diff --git a/api/apps/sdk/dataset.py b/api/apps/sdk/dataset.py index c20669cd9..02f990603 100644 --- a/api/apps/sdk/dataset.py +++ b/api/apps/sdk/dataset.py @@ -69,7 +69,7 @@ def create(tenant_id): chunk_method: type: string enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", - "presentation", "picture", "one", "knowledge_graph", "email", "tag" + "presentation", "picture", "one", "email", "tag" ] description: Chunking method. parser_config: @@ -105,7 +105,6 @@ def create(tenant_id): "presentation", "picture", "one", - "knowledge_graph", "email", "tag" ] @@ -315,7 +314,7 @@ def update(tenant_id, dataset_id): chunk_method: type: string enum: ["naive", "manual", "qa", "table", "paper", "book", "laws", - "presentation", "picture", "one", "knowledge_graph", "email", "tag" + "presentation", "picture", "one", "email", "tag" ] description: Updated chunking method. parser_config: @@ -353,7 +352,6 @@ def update(tenant_id, dataset_id): "presentation", "picture", "one", - "knowledge_graph", "email", "tag" ] diff --git a/api/settings.py b/api/settings.py index 87b4858b9..24dc31e17 100644 --- a/api/settings.py +++ b/api/settings.py @@ -109,7 +109,7 @@ def init_settings(): API_KEY = LLM.get("api_key", "") PARSERS = LLM.get( "parsers", - "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag") + "naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag") HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1") HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port") diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index f849652fa..7ebc80b01 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -393,7 +393,6 @@ curl --request POST \ - `"presentation"`: Presentation - `"picture"`: Picture - `"one"`: One - - `"knowledge_graph"`: Knowledge Graph Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! - `"email"`: Email @@ -409,10 +408,6 @@ curl --request POST \ - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. - - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: - - `"chunk_token_count"`: Defaults to `128`. - - `"delimiter"`: Defaults to `"\n"`. - - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` #### Response @@ -573,8 +568,6 @@ curl --request PUT \ - `"picture"`: Picture - `"one"`:One - `"email"`: Email - - `"knowledge_graph"`: Knowledge Graph - Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! #### Response @@ -655,7 +648,7 @@ Success: "id": "6e211ee0723611efa10a0242ac120007", "language": "English", "name": "mysql", - "chunk_method": "knowledge_graph", + "chunk_method": "naive", "parser_config": { "chunk_token_num": 8192, "delimiter": "\\n", @@ -841,10 +834,6 @@ curl --request PUT \ - If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute: - `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`. - If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object. - - If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes: - - `"chunk_token_count"`: Defaults to `128`. - - `"delimiter"`: Defaults to `"\n"`. - - `"entity_types"`: Defaults to `["organization","person","location","event","time"]` #### Response diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index f841104e6..bdf02b5e0 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options: - `"presentation"`: Presentation - `"picture"`: Picture - `"one"`: One -- `"knowledge_graph"`: Knowledge Graph - Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! - `"email"`: Email ##### parser_config @@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys: - `"presentation"`: Presentation - `"picture"`: Picture - `"one"`: One - - `"knowledge_graph"`: Knowledge Graph - Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens! - `"email"`: Email - `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`: - `"chunk_method"`=`"naive"`: diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py index 5f0a9d0fb..b564c7bb8 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_create_dataset.py @@ -122,7 +122,6 @@ class TestDatasetCreation: ("presentation", "presentation", 0), ("picture", "picture", 0), ("one", "one", 0), - ("picknowledge_graphture", "knowledge_graph", 0), ("email", "email", 0), ("tag", "tag", 0), ("empty_chunk_method", "", 0), diff --git a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py index 1213c5450..635ef18ab 100644 --- a/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py +++ b/sdk/python/test/test_http_api/test_dataset_mangement/test_update_dataset.py @@ -114,14 +114,13 @@ class TestDatasetUpdate: ("presentation", 0, ""), ("picture", 0, ""), ("one", 0, ""), - ("knowledge_graph", 0, ""), ("email", 0, ""), ("tag", 0, ""), ("", 0, ""), ( "other_chunk_method", 102, - "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']", + "'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']", ), ], ) diff --git a/sdk/python/test/test_sdk_api/t_dataset.py b/sdk/python/test/test_sdk_api/t_dataset.py index 7163e445c..76f84be8f 100644 --- a/sdk/python/test/test_sdk_api/t_dataset.py +++ b/sdk/python/test/test_sdk_api/t_dataset.py @@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture): def test_create_dataset_with_random_chunk_method(get_api_key_fixture): API_KEY = get_api_key_fixture rag = RAGFlow(API_KEY, HOST_ADDRESS) - valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", - "knowledge_graph", "email"] + valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"] random_chunk_method = random.choice(valid_chunk_methods) rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method) @@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture): def test_create_dataset_with_invalid_parameter(get_api_key_fixture): API_KEY = get_api_key_fixture rag = RAGFlow(API_KEY, HOST_ADDRESS) - valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", - "knowledge_graph", "email", "tag"] + valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"] chunk_method = "invalid_chunk_method" with pytest.raises(Exception) as exc_info: rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)