Fix pandas indexing method for knowledge base imports (#12637) (#12638)

Co-authored-by: CN-P5 <heibai2006@qq.com>
This commit is contained in:
CN-P5 2025-01-13 09:06:59 +08:00 committed by GitHub
parent d8f57bf899
commit cd257b91c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 6 additions and 6 deletions

View File

@ -375,9 +375,9 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
result = [] result = []
for index, row in df.iterrows(): for index, row in df.iterrows():
if document.doc_form == "qa_model": if document.doc_form == "qa_model":
data = {"content": row[0], "answer": row[1]} data = {"content": row.iloc[0], "answer": row.iloc[1]}
else: else:
data = {"content": row[0]} data = {"content": row.iloc[0]}
result.append(data) result.append(data)
if len(result) == 0: if len(result) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")

View File

@ -112,7 +112,7 @@ class QAIndexProcessor(BaseIndexProcessor):
df = pd.read_csv(file) df = pd.read_csv(file)
text_docs = [] text_docs = []
for index, row in df.iterrows(): for index, row in df.iterrows():
data = Document(page_content=row[0], metadata={"answer": row[1]}) data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]})
text_docs.append(data) text_docs.append(data)
if len(text_docs) == 0: if len(text_docs) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")

View File

@ -286,7 +286,7 @@ class AppAnnotationService:
df = pd.read_csv(file) df = pd.read_csv(file)
result = [] result = []
for index, row in df.iterrows(): for index, row in df.iterrows():
content = {"question": row[0], "answer": row[1]} content = {"question": row.iloc[0], "answer": row.iloc[1]}
result.append(content) result.append(content)
if len(result) == 0: if len(result) == 0:
raise ValueError("The CSV file is empty.") raise ValueError("The CSV file is empty.")

View File

@ -77,8 +77,8 @@ def batch_create_segment_to_index_task(
index_node_id=doc_id, index_node_id=doc_id,
index_node_hash=segment_hash, index_node_hash=segment_hash,
position=max_position + 1 if max_position else 1, position=max_position + 1 if max_position else 1,
content=content, content=content_str,
word_count=len(content), word_count=len(content_str),
tokens=tokens, tokens=tokens,
created_by=user_id, created_by=user_id,
indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None),