Refa: knowledge_graph chunk method is deprecated (#7220)

### What problem does this PR solve?

The knowledge_graph chunk method is deprecated and should no longer be
used. #7184.

### Type of change

- [x] Refactoring
This commit is contained in:
Yongteng Lei 2025-04-23 13:01:46 +08:00 committed by GitHub
parent 03672df691
commit 94181a990b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 7 additions and 28 deletions

View File

@ -69,7 +69,7 @@ def create(tenant_id):
chunk_method:
type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
"presentation", "picture", "one", "email", "tag"
]
description: Chunking method.
parser_config:
@ -105,7 +105,6 @@ def create(tenant_id):
"presentation",
"picture",
"one",
"knowledge_graph",
"email",
"tag"
]
@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
chunk_method:
type: string
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
"presentation", "picture", "one", "email", "tag"
]
description: Updated chunking method.
parser_config:
@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
"presentation",
"picture",
"one",
"knowledge_graph",
"email",
"tag"
]

View File

@ -109,7 +109,7 @@ def init_settings():
API_KEY = LLM.get("api_key", "")
PARSERS = LLM.get(
"parsers",
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")
HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")

View File

@ -393,7 +393,6 @@ curl --request POST \
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email
@ -409,10 +408,6 @@ curl --request POST \
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
- `"chunk_token_count"`: Defaults to `128`.
- `"delimiter"`: Defaults to `"\n"`.
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
#### Response
@ -573,8 +568,6 @@ curl --request PUT \
- `"picture"`: Picture
- `"one"`:One
- `"email"`: Email
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
#### Response
@ -655,7 +648,7 @@ Success:
"id": "6e211ee0723611efa10a0242ac120007",
"language": "English",
"name": "mysql",
"chunk_method": "knowledge_graph",
"chunk_method": "naive",
"parser_config": {
"chunk_token_num": 8192,
"delimiter": "\\n",
@ -841,10 +834,6 @@ curl --request PUT \
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
- `"chunk_token_count"`: Defaults to `128`.
- `"delimiter"`: Defaults to `"\n"`.
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
#### Response

View File

@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email
##### parser_config
@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
- `"presentation"`: Presentation
- `"picture"`: Picture
- `"one"`: One
- `"knowledge_graph"`: Knowledge Graph
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
- `"email"`: Email
- `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
- `"chunk_method"`=`"naive"`:

View File

@ -122,7 +122,6 @@ class TestDatasetCreation:
("presentation", "presentation", 0),
("picture", "picture", 0),
("one", "one", 0),
("picknowledge_graphture", "knowledge_graph", 0),
("email", "email", 0),
("tag", "tag", 0),
("empty_chunk_method", "", 0),

View File

@ -114,14 +114,13 @@ class TestDatasetUpdate:
("presentation", 0, ""),
("picture", 0, ""),
("one", 0, ""),
("knowledge_graph", 0, ""),
("email", 0, ""),
("tag", 0, ""),
("", 0, ""),
(
"other_chunk_method",
102,
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
),
],
)

View File

@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email"]
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
random_chunk_method = random.choice(valid_chunk_methods)
rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email", "tag"]
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
chunk_method = "invalid_chunk_method"
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)