Refa: knowledge_graph chunk method is deprecated (#7220)

### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoring
2025-08-14 00:55:57 +08:00 · 2025-04-23 13:01:46 +08:00 · 2025-04-23 13:01:46 +08:00 · 94181a990b
commit 94181a990b
parent 03672df691
7 changed files with 7 additions and 28 deletions
--- a/api/apps/sdk/dataset.py
+++ b/api/apps/sdk/dataset.py
@ -69,7 +69,7 @@ def create(tenant_id):
            chunk_method:
              type: string
              enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
-                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
+                     "presentation", "picture", "one", "email", "tag"
                     ]
              description: Chunking method.
            parser_config:
@ -105,7 +105,6 @@ def create(tenant_id):
        "presentation",
        "picture",
        "one",
-        "knowledge_graph",
        "email",
        "tag"
    ]
@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
            chunk_method:
              type: string
              enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
-                     "presentation", "picture", "one", "knowledge_graph", "email", "tag"
+                     "presentation", "picture", "one", "email", "tag"
                     ]
              description: Updated chunking method.
            parser_config:
@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
        "presentation",
        "picture",
        "one",
-        "knowledge_graph",
        "email",
        "tag"
    ]
--- a/api/settings.py
+++ b/api/settings.py
@ -109,7 +109,7 @@ def init_settings():
    API_KEY = LLM.get("api_key", "")
    PARSERS = LLM.get(
        "parsers",
-        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
+        "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")

    HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
    HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
--- a/docs/references/http_api_reference.md
+++ b/docs/references/http_api_reference.md
@ -393,7 +393,6 @@ curl --request POST \
  - `"presentation"`: Presentation
  - `"picture"`: Picture
  - `"one"`: One
-  - `"knowledge_graph"`: Knowledge Graph  
    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
  - `"email"`: Email

@ -409,10 +408,6 @@ curl --request POST \
  - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:  
    - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
  - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
-  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:  
-    - `"chunk_token_count"`: Defaults to `128`.
-    - `"delimiter"`: Defaults to `"\n"`.
-    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`

 #### Response

@ -573,8 +568,6 @@ curl --request PUT \
  - `"picture"`: Picture
  - `"one"`:One
  - `"email"`: Email
-  - `"knowledge_graph"`: Knowledge Graph  
-    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!

 #### Response

@ -655,7 +648,7 @@ Success:
            "id": "6e211ee0723611efa10a0242ac120007",
            "language": "English",
            "name": "mysql",
-            "chunk_method": "knowledge_graph",
+            "chunk_method": "naive",
            "parser_config": {
                "chunk_token_num": 8192,
                "delimiter": "\\n",
@ -841,10 +834,6 @@ curl --request PUT \
  - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
    - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
  - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
-  - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
-    - `"chunk_token_count"`: Defaults to `128`.
-    - `"delimiter"`: Defaults to `"\n"`.
-    - `"entity_types"`: Defaults to `["organization","person","location","event","time"]`

 #### Response

--- a/docs/references/python_api_reference.md
+++ b/docs/references/python_api_reference.md
@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
 - `"presentation"`: Presentation
 - `"picture"`: Picture
 - `"one"`: One
- `"knowledge_graph"`: Knowledge Graph  
-  Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
 - `"email"`: Email

 ##### parser_config
@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
  - `"presentation"`: Presentation
  - `"picture"`: Picture
  - `"one"`: One
-  - `"knowledge_graph"`: Knowledge Graph  
-    Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
  - `"email"`: Email
 - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
  - `"chunk_method"`=`"naive"`:  
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py
@ -122,7 +122,6 @@ class TestDatasetCreation:
            ("presentation", "presentation", 0),
            ("picture", "picture", 0),
            ("one", "one", 0),
-            ("picknowledge_graphture", "knowledge_graph", 0),
            ("email", "email", 0),
            ("tag", "tag", 0),
            ("empty_chunk_method", "", 0),
--- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
+++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py
@ -114,14 +114,13 @@ class TestDatasetUpdate:
            ("presentation", 0, ""),
            ("picture", 0, ""),
            ("one", 0, ""),
-            ("knowledge_graph", 0, ""),
            ("email", 0, ""),
            ("tag", 0, ""),
            ("", 0, ""),
            (
                "other_chunk_method",
                102,
-                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
+                "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
            ),
        ],
    )
--- a/sdk/python/test/test_sdk_api/t_dataset.py
+++ b/sdk/python/test/test_sdk_api/t_dataset.py
@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
 def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
-    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
-                           "knowledge_graph", "email"]
+    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
    random_chunk_method = random.choice(valid_chunk_methods)
    rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)

@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
 def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
    API_KEY = get_api_key_fixture
    rag = RAGFlow(API_KEY, HOST_ADDRESS)
-    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
-                           "knowledge_graph", "email", "tag"]
+    valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
    chunk_method = "invalid_chunk_method"
    with pytest.raises(Exception) as exc_info:
        rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)