From 20b932da9758838557a9b457be344e438a73e70c Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 20 Feb 2024 16:05:09 +0800 Subject: [PATCH] del doc support (#2494) Co-authored-by: jyong --- api/core/data_loader/file_extractor.py | 4 ++-- api/services/file_service.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/core/data_loader/file_extractor.py b/api/core/data_loader/file_extractor.py index 4a6eb3654d..4741014c96 100644 --- a/api/core/data_loader/file_extractor.py +++ b/api/core/data_loader/file_extractor.py @@ -69,7 +69,7 @@ class FileExtractor: else MarkdownLoader(file_path, autodetect_encoding=True) elif file_extension in ['.htm', '.html']: loader = HTMLLoader(file_path) - elif file_extension in ['.docx', '.doc']: + elif file_extension in ['.docx']: loader = Docx2txtLoader(file_path) elif file_extension == '.csv': loader = CSVLoader(file_path, autodetect_encoding=True) @@ -96,7 +96,7 @@ class FileExtractor: loader = MarkdownLoader(file_path, autodetect_encoding=True) elif file_extension in ['.htm', '.html']: loader = HTMLLoader(file_path) - elif file_extension in ['.docx', '.doc']: + elif file_extension in ['.docx']: loader = Docx2txtLoader(file_path) elif file_extension == '.csv': loader = CSVLoader(file_path, autodetect_encoding=True) diff --git a/api/services/file_service.py b/api/services/file_service.py index 3c56e6808e..215ccf688a 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg'] IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS]) -ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS +ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', - 'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS + 'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS PREVIEW_WORDS_LIMIT = 3000 @@ -162,7 +162,7 @@ class FileService: generator = storage.load(upload_file.key, stream=True) return generator, upload_file.mime_type - + @staticmethod def get_public_image_preview(file_id: str) -> str: upload_file = db.session.query(UploadFile) \