From cd257b91c54231d1a4f32c1ad7ff8a0db67b586b Mon Sep 17 00:00:00 2001 From: CN-P5 Date: Mon, 13 Jan 2025 09:06:59 +0800 Subject: [PATCH] Fix pandas indexing method for knowledge base imports (#12637) (#12638) Co-authored-by: CN-P5 --- api/controllers/console/datasets/datasets_segments.py | 4 ++-- api/core/rag/index_processor/processor/qa_index_processor.py | 2 +- api/services/annotation_service.py | 2 +- api/tasks/batch_create_segment_to_index_task.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 034fe9cfe2..2dd86a1b32 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -375,9 +375,9 @@ class DatasetDocumentSegmentBatchImportApi(Resource): result = [] for index, row in df.iterrows(): if document.doc_form == "qa_model": - data = {"content": row[0], "answer": row[1]} + data = {"content": row.iloc[0], "answer": row.iloc[1]} else: - data = {"content": row[0]} + data = {"content": row.iloc[0]} result.append(data) if len(result) == 0: raise ValueError("The CSV file is empty.") diff --git a/api/core/rag/index_processor/processor/qa_index_processor.py b/api/core/rag/index_processor/processor/qa_index_processor.py index 58b50a9fcb..0055625e13 100644 --- a/api/core/rag/index_processor/processor/qa_index_processor.py +++ b/api/core/rag/index_processor/processor/qa_index_processor.py @@ -112,7 +112,7 @@ class QAIndexProcessor(BaseIndexProcessor): df = pd.read_csv(file) text_docs = [] for index, row in df.iterrows(): - data = Document(page_content=row[0], metadata={"answer": row[1]}) + data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]}) text_docs.append(data) if len(text_docs) == 0: raise ValueError("The CSV file is empty.") diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index a946405c95..45ec1e9b5a 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -286,7 +286,7 @@ class AppAnnotationService: df = pd.read_csv(file) result = [] for index, row in df.iterrows(): - content = {"question": row[0], "answer": row[1]} + content = {"question": row.iloc[0], "answer": row.iloc[1]} result.append(content) if len(result) == 0: raise ValueError("The CSV file is empty.") diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index 05a0f0a407..dbef6b708e 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -77,8 +77,8 @@ def batch_create_segment_to_index_task( index_node_id=doc_id, index_node_hash=segment_hash, position=max_position + 1 if max_position else 1, - content=content, - word_count=len(content), + content=content_str, + word_count=len(content_str), tokens=tokens, created_by=user_id, indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None),