Fix rebuild index&csv parsing (#2705)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Charlie.Wei 2024-03-06 11:33:32 +08:00 committed by GitHub
parent 1cf5f510ed
commit fa7ba30ba3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View File

@ -186,7 +186,7 @@ class IndexingRunner:
first() first()
index_type = dataset_document.doc_form index_type = dataset_document.doc_form
index_processor = IndexProcessorFactory(index_type, processing_rule.to_dict()).init_index_processor() index_processor = IndexProcessorFactory(index_type).init_index_processor()
self._load( self._load(
index_processor=index_processor, index_processor=index_processor,
dataset=dataset, dataset=dataset,

View File

@ -3,6 +3,7 @@ import csv
from typing import Optional from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.extractor.helpers import detect_file_encodings
from core.rag.models.document import Document from core.rag.models.document import Document
@ -36,7 +37,7 @@ class CSVExtractor(BaseExtractor):
docs = self._read_from_file(csvfile) docs = self._read_from_file(csvfile)
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
if self._autodetect_encoding: if self._autodetect_encoding:
detected_encodings = detect_filze_encodings(self._file_path) detected_encodings = detect_file_encodings(self._file_path)
for encoding in detected_encodings: for encoding in detected_encodings:
try: try:
with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile: with open(self._file_path, newline="", encoding=encoding.encoding) as csvfile: