mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-14 00:55:57 +08:00
Refa: knowledge_graph chunk method is deprecated (#7220)
### What problem does this PR solve? The knowledge_graph chunk method is deprecated and should no longer be used. #7184. ### Type of change - [x] Refactoring
This commit is contained in:
parent
03672df691
commit
94181a990b
@ -69,7 +69,7 @@ def create(tenant_id):
|
||||
chunk_method:
|
||||
type: string
|
||||
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
||||
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
|
||||
"presentation", "picture", "one", "email", "tag"
|
||||
]
|
||||
description: Chunking method.
|
||||
parser_config:
|
||||
@ -105,7 +105,6 @@ def create(tenant_id):
|
||||
"presentation",
|
||||
"picture",
|
||||
"one",
|
||||
"knowledge_graph",
|
||||
"email",
|
||||
"tag"
|
||||
]
|
||||
@ -315,7 +314,7 @@ def update(tenant_id, dataset_id):
|
||||
chunk_method:
|
||||
type: string
|
||||
enum: ["naive", "manual", "qa", "table", "paper", "book", "laws",
|
||||
"presentation", "picture", "one", "knowledge_graph", "email", "tag"
|
||||
"presentation", "picture", "one", "email", "tag"
|
||||
]
|
||||
description: Updated chunking method.
|
||||
parser_config:
|
||||
@ -353,7 +352,6 @@ def update(tenant_id, dataset_id):
|
||||
"presentation",
|
||||
"picture",
|
||||
"one",
|
||||
"knowledge_graph",
|
||||
"email",
|
||||
"tag"
|
||||
]
|
||||
|
@ -109,7 +109,7 @@ def init_settings():
|
||||
API_KEY = LLM.get("api_key", "")
|
||||
PARSERS = LLM.get(
|
||||
"parsers",
|
||||
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,knowledge_graph:Knowledge Graph,email:Email,tag:Tag")
|
||||
"naive:General,qa:Q&A,resume:Resume,manual:Manual,table:Table,paper:Paper,book:Book,laws:Laws,presentation:Presentation,picture:Picture,one:One,audio:Audio,email:Email,tag:Tag")
|
||||
|
||||
HOST_IP = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("host", "127.0.0.1")
|
||||
HOST_PORT = get_base_config(RAG_FLOW_SERVICE_NAME, {}).get("http_port")
|
||||
|
@ -393,7 +393,6 @@ curl --request POST \
|
||||
- `"presentation"`: Presentation
|
||||
- `"picture"`: Picture
|
||||
- `"one"`: One
|
||||
- `"knowledge_graph"`: Knowledge Graph
|
||||
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
|
||||
- `"email"`: Email
|
||||
|
||||
@ -409,10 +408,6 @@ curl --request POST \
|
||||
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
|
||||
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
|
||||
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
||||
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
|
||||
- `"chunk_token_count"`: Defaults to `128`.
|
||||
- `"delimiter"`: Defaults to `"\n"`.
|
||||
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
|
||||
|
||||
#### Response
|
||||
|
||||
@ -573,8 +568,6 @@ curl --request PUT \
|
||||
- `"picture"`: Picture
|
||||
- `"one"`:One
|
||||
- `"email"`: Email
|
||||
- `"knowledge_graph"`: Knowledge Graph
|
||||
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
|
||||
|
||||
#### Response
|
||||
|
||||
@ -655,7 +648,7 @@ Success:
|
||||
"id": "6e211ee0723611efa10a0242ac120007",
|
||||
"language": "English",
|
||||
"name": "mysql",
|
||||
"chunk_method": "knowledge_graph",
|
||||
"chunk_method": "naive",
|
||||
"parser_config": {
|
||||
"chunk_token_num": 8192,
|
||||
"delimiter": "\\n",
|
||||
@ -841,10 +834,6 @@ curl --request PUT \
|
||||
- If `"chunk_method"` is `"qa"`, `"manuel"`, `"paper"`, `"book"`, `"laws"`, or `"presentation"`, the `"parser_config"` object contains the following attribute:
|
||||
- `"raptor"`: Raptor-specific settings. Defaults to: `{"use_raptor": false}`.
|
||||
- If `"chunk_method"` is `"table"`, `"picture"`, `"one"`, or `"email"`, `"parser_config"` is an empty JSON object.
|
||||
- If `"chunk_method"` is `"knowledge_graph"`, the `"parser_config"` object contains the following attributes:
|
||||
- `"chunk_token_count"`: Defaults to `128`.
|
||||
- `"delimiter"`: Defaults to `"\n"`.
|
||||
- `"entity_types"`: Defaults to `["organization","person","location","event","time"]`
|
||||
|
||||
#### Response
|
||||
|
||||
|
@ -145,8 +145,6 @@ The chunking method of the dataset to create. Available options:
|
||||
- `"presentation"`: Presentation
|
||||
- `"picture"`: Picture
|
||||
- `"one"`: One
|
||||
- `"knowledge_graph"`: Knowledge Graph
|
||||
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
|
||||
- `"email"`: Email
|
||||
|
||||
##### parser_config
|
||||
@ -398,8 +396,6 @@ A dictionary representing the attributes to update, with the following keys:
|
||||
- `"presentation"`: Presentation
|
||||
- `"picture"`: Picture
|
||||
- `"one"`: One
|
||||
- `"knowledge_graph"`: Knowledge Graph
|
||||
Ensure your LLM is properly configured on the **Settings** page before selecting this. Please also note that Knowledge Graph consumes a large number of Tokens!
|
||||
- `"email"`: Email
|
||||
- `"parser_config"`: `dict[str, Any]` The parsing configuration for the document. Its attributes vary based on the selected `"chunk_method"`:
|
||||
- `"chunk_method"`=`"naive"`:
|
||||
|
@ -122,7 +122,6 @@ class TestDatasetCreation:
|
||||
("presentation", "presentation", 0),
|
||||
("picture", "picture", 0),
|
||||
("one", "one", 0),
|
||||
("picknowledge_graphture", "knowledge_graph", 0),
|
||||
("email", "email", 0),
|
||||
("tag", "tag", 0),
|
||||
("empty_chunk_method", "", 0),
|
||||
|
@ -114,14 +114,13 @@ class TestDatasetUpdate:
|
||||
("presentation", 0, ""),
|
||||
("picture", 0, ""),
|
||||
("one", 0, ""),
|
||||
("knowledge_graph", 0, ""),
|
||||
("email", 0, ""),
|
||||
("tag", 0, ""),
|
||||
("", 0, ""),
|
||||
(
|
||||
"other_chunk_method",
|
||||
102,
|
||||
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'knowledge_graph', 'email', 'tag']",
|
||||
"'other_chunk_method' is not in ['naive', 'manual', 'qa', 'table', 'paper', 'book', 'laws', 'presentation', 'picture', 'one', 'email', 'tag']",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
@ -38,8 +38,7 @@ def test_create_dataset_with_duplicated_name(get_api_key_fixture):
|
||||
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
|
||||
"knowledge_graph", "email"]
|
||||
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email"]
|
||||
random_chunk_method = random.choice(valid_chunk_methods)
|
||||
rag.create_dataset("test_create_dataset_with_random_chunk_method", chunk_method=random_chunk_method)
|
||||
|
||||
@ -47,8 +46,7 @@ def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
|
||||
def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
|
||||
API_KEY = get_api_key_fixture
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
|
||||
"knowledge_graph", "email", "tag"]
|
||||
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one", "email", "tag"]
|
||||
chunk_method = "invalid_chunk_method"
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
rag.create_dataset("test_create_dataset_with_invalid_chunk_method", chunk_method=chunk_method)
|
||||
|
Loading…
x
Reference in New Issue
Block a user