mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 20:59:00 +08:00
Fix: Add Knowledge Base Document Parsing Status Check (#5966)
When creating and updating chats, add a check for the parsing status of knowledge base documents. Ensure that all documents have been parsed before allowing chat creation to improve user experience and system stability. **Main Changes:** - Add document parsing status check logic in `chat.py`. - Implement the `is_parsed_done` method in `knowledgebase_service.py`. - Prevent chat creation when documents are being parsed or parsing has failed. ### What problem does this PR solve? fix this bug:https://github.com/infiniflow/ragflow/issues/5960 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: wenju.li <wenju.li@deepctr.cn>
This commit is contained in:
parent
41c67ce8dd
commit
e3ea4b7ec2
@ -40,6 +40,12 @@ def create(tenant_id):
|
|||||||
kb = kbs[0]
|
kb = kbs[0]
|
||||||
if kb.chunk_num == 0:
|
if kb.chunk_num == 0:
|
||||||
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
||||||
|
|
||||||
|
# Check if all documents in the knowledge base have been parsed
|
||||||
|
is_done, error_msg = KnowledgebaseService.is_parsed_done(kb_id)
|
||||||
|
if not is_done:
|
||||||
|
return get_error_data_result(error_msg)
|
||||||
|
|
||||||
kbs = KnowledgebaseService.get_by_ids(ids) if ids else []
|
kbs = KnowledgebaseService.get_by_ids(ids) if ids else []
|
||||||
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
||||||
embd_count = list(set(embd_ids))
|
embd_count = list(set(embd_ids))
|
||||||
@ -176,6 +182,12 @@ def update(tenant_id, chat_id):
|
|||||||
kb = kbs[0]
|
kb = kbs[0]
|
||||||
if kb.chunk_num == 0:
|
if kb.chunk_num == 0:
|
||||||
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
||||||
|
|
||||||
|
# Check if all documents in the knowledge base have been parsed
|
||||||
|
is_done, error_msg = KnowledgebaseService.is_parsed_done(kb_id)
|
||||||
|
if not is_done:
|
||||||
|
return get_error_data_result(error_msg)
|
||||||
|
|
||||||
kbs = KnowledgebaseService.get_by_ids(ids)
|
kbs = KnowledgebaseService.get_by_ids(ids)
|
||||||
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
embd_ids = [TenantLLMService.split_model_name_and_factory(kb.embd_id)[0] for kb in kbs] # remove vendor suffix for comparison
|
||||||
embd_count = list(set(embd_ids))
|
embd_count = list(set(embd_ids))
|
||||||
|
@ -22,6 +22,42 @@ from peewee import fn
|
|||||||
class KnowledgebaseService(CommonService):
|
class KnowledgebaseService(CommonService):
|
||||||
model = Knowledgebase
|
model = Knowledgebase
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def is_parsed_done(cls, kb_id):
|
||||||
|
"""
|
||||||
|
Check if all documents in the knowledge base have completed parsing
|
||||||
|
|
||||||
|
Args:
|
||||||
|
kb_id: Knowledge base ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
If all documents are parsed successfully, returns (True, None)
|
||||||
|
If any document is not fully parsed, returns (False, error_message)
|
||||||
|
"""
|
||||||
|
from api.db import TaskStatus
|
||||||
|
from api.db.services.document_service import DocumentService
|
||||||
|
|
||||||
|
# Get knowledge base information
|
||||||
|
kbs = cls.query(id=kb_id)
|
||||||
|
if not kbs:
|
||||||
|
return False, "Knowledge base not found"
|
||||||
|
kb = kbs[0]
|
||||||
|
|
||||||
|
# Get all documents in the knowledge base
|
||||||
|
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "")
|
||||||
|
|
||||||
|
# Check parsing status of each document
|
||||||
|
for doc in docs:
|
||||||
|
# If document is being parsed, don't allow chat creation
|
||||||
|
if doc['run'] == TaskStatus.RUNNING.value or doc['run'] == TaskStatus.CANCEL.value or doc['run'] == TaskStatus.FAIL.value:
|
||||||
|
return False, f"Document '{doc['name']}' in dataset '{kb.name}' is still being parsed. Please wait until all documents are parsed before starting a chat."
|
||||||
|
# If document is not yet parsed and has no chunks, don't allow chat creation
|
||||||
|
if doc['run'] == TaskStatus.UNSTART.value and doc['chunk_num'] == 0:
|
||||||
|
return False, f"Document '{doc['name']}' in dataset '{kb.name}' has not been parsed yet. Please parse all documents before starting a chat."
|
||||||
|
|
||||||
|
return True, None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def list_documents_by_ids(cls,kb_ids):
|
def list_documents_by_ids(cls,kb_ids):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user