mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 10:48:59 +08:00
parent
207080babc
commit
20b932da97
@ -69,7 +69,7 @@ class FileExtractor:
|
|||||||
else MarkdownLoader(file_path, autodetect_encoding=True)
|
else MarkdownLoader(file_path, autodetect_encoding=True)
|
||||||
elif file_extension in ['.htm', '.html']:
|
elif file_extension in ['.htm', '.html']:
|
||||||
loader = HTMLLoader(file_path)
|
loader = HTMLLoader(file_path)
|
||||||
elif file_extension in ['.docx', '.doc']:
|
elif file_extension in ['.docx']:
|
||||||
loader = Docx2txtLoader(file_path)
|
loader = Docx2txtLoader(file_path)
|
||||||
elif file_extension == '.csv':
|
elif file_extension == '.csv':
|
||||||
loader = CSVLoader(file_path, autodetect_encoding=True)
|
loader = CSVLoader(file_path, autodetect_encoding=True)
|
||||||
@ -96,7 +96,7 @@ class FileExtractor:
|
|||||||
loader = MarkdownLoader(file_path, autodetect_encoding=True)
|
loader = MarkdownLoader(file_path, autodetect_encoding=True)
|
||||||
elif file_extension in ['.htm', '.html']:
|
elif file_extension in ['.htm', '.html']:
|
||||||
loader = HTMLLoader(file_path)
|
loader = HTMLLoader(file_path)
|
||||||
elif file_extension in ['.docx', '.doc']:
|
elif file_extension in ['.docx']:
|
||||||
loader = Docx2txtLoader(file_path)
|
loader = Docx2txtLoader(file_path)
|
||||||
elif file_extension == '.csv':
|
elif file_extension == '.csv':
|
||||||
loader = CSVLoader(file_path, autodetect_encoding=True)
|
loader = CSVLoader(file_path, autodetect_encoding=True)
|
||||||
|
@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
|
|||||||
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
|
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
|
||||||
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
||||||
|
|
||||||
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'doc', 'csv'] + IMAGE_EXTENSIONS
|
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
|
||||||
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
|
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
|
||||||
'docx', 'doc', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
|
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
|
||||||
PREVIEW_WORDS_LIMIT = 3000
|
PREVIEW_WORDS_LIMIT = 3000
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user