diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 98248874e2..8892f4508e 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -525,12 +525,13 @@ class IndexingRunner: documents = splitter.split_documents([text_doc]) split_documents = [] for document_node in documents: - doc_id = str(uuid.uuid4()) - hash = helper.generate_text_hash(document_node.page_content) - document_node.metadata['doc_id'] = doc_id - document_node.metadata['doc_hash'] = hash - split_documents.append(document_node) + if document_node.page_content.strip(): + doc_id = str(uuid.uuid4()) + hash = helper.generate_text_hash(document_node.page_content) + document_node.metadata['doc_id'] = doc_id + document_node.metadata['doc_hash'] = hash + split_documents.append(document_node) all_documents.extend(split_documents) # processing qa document if document_form == 'qa_model': diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index e64fa881f0..2f9844c636 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -891,6 +891,10 @@ class SegmentService: if document.doc_form == 'qa_model': if 'answer' not in args or not args['answer']: raise ValueError("Answer is required") + if not args['answer'].strip(): + raise ValueError("Answer is empty") + if 'content' not in args or not args['content'] or not args['content'].strip(): + raise ValueError("Content is empty") @classmethod def create_segment(cls, args: dict, document: Document, dataset: Dataset):