mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 22:05:53 +08:00
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
This commit is contained in:
parent
e565ecdaef
commit
36cb25b341
@ -14,11 +14,11 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
|
|||||||
|
|
||||||
|
|
||||||
if dify_config.ETL_TYPE == "Unstructured":
|
if dify_config.ETL_TYPE == "Unstructured":
|
||||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls"]
|
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"]
|
||||||
DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
|
DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
|
||||||
if dify_config.UNSTRUCTURED_API_URL:
|
if dify_config.UNSTRUCTURED_API_URL:
|
||||||
DOCUMENT_EXTENSIONS.append("ppt")
|
DOCUMENT_EXTENSIONS.append("ppt")
|
||||||
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
||||||
else:
|
else:
|
||||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
|
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
|
||||||
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
||||||
|
@ -103,7 +103,7 @@ class ExtractProcessor:
|
|||||||
extractor = ExcelExtractor(file_path)
|
extractor = ExcelExtractor(file_path)
|
||||||
elif file_extension == ".pdf":
|
elif file_extension == ".pdf":
|
||||||
extractor = PdfExtractor(file_path)
|
extractor = PdfExtractor(file_path)
|
||||||
elif file_extension in {".md", ".markdown"}:
|
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||||
extractor = (
|
extractor = (
|
||||||
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||||
if is_automatic
|
if is_automatic
|
||||||
@ -141,7 +141,7 @@ class ExtractProcessor:
|
|||||||
extractor = ExcelExtractor(file_path)
|
extractor = ExcelExtractor(file_path)
|
||||||
elif file_extension == ".pdf":
|
elif file_extension == ".pdf":
|
||||||
extractor = PdfExtractor(file_path)
|
extractor = PdfExtractor(file_path)
|
||||||
elif file_extension in {".md", ".markdown"}:
|
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||||
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
|
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
|
||||||
elif file_extension in {".htm", ".html"}:
|
elif file_extension in {".htm", ".html"}:
|
||||||
extractor = HtmlExtractor(file_path)
|
extractor = HtmlExtractor(file_path)
|
||||||
|
@ -36,6 +36,7 @@ const FileIcon: FC<FileIconProps> = ({
|
|||||||
return <Json className={className} />
|
return <Json className={className} />
|
||||||
case 'md':
|
case 'md':
|
||||||
case 'markdown':
|
case 'markdown':
|
||||||
|
case 'mdx':
|
||||||
return <Md className={className} />
|
return <Md className={className} />
|
||||||
case 'pdf':
|
case 'pdf':
|
||||||
return <Pdf className={className} />
|
return <Pdf className={className} />
|
||||||
|
@ -84,7 +84,7 @@ export const getFileAppearanceType = (fileName: string, fileMimetype: string) =>
|
|||||||
if (extension === 'pdf')
|
if (extension === 'pdf')
|
||||||
return FileAppearanceTypeEnum.pdf
|
return FileAppearanceTypeEnum.pdf
|
||||||
|
|
||||||
if (extension === 'md' || extension === 'markdown')
|
if (extension === 'md' || extension === 'markdown' || extension === 'mdx')
|
||||||
return FileAppearanceTypeEnum.markdown
|
return FileAppearanceTypeEnum.markdown
|
||||||
|
|
||||||
if (extension === 'xlsx' || extension === 'xls')
|
if (extension === 'xlsx' || extension === 'xls')
|
||||||
|
@ -52,7 +52,7 @@ export const getInputVars = (text: string): ValueSelector[] => {
|
|||||||
|
|
||||||
export const FILE_EXTS: Record<string, string[]> = {
|
export const FILE_EXTS: Record<string, string[]> = {
|
||||||
[SupportUploadFileTypes.image]: ['JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'],
|
[SupportUploadFileTypes.image]: ['JPG', 'JPEG', 'PNG', 'GIF', 'WEBP', 'SVG'],
|
||||||
[SupportUploadFileTypes.document]: ['TXT', 'MD', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'],
|
[SupportUploadFileTypes.document]: ['TXT', 'MD', 'MDX', 'MARKDOWN', 'PDF', 'HTML', 'XLSX', 'XLS', 'DOCX', 'CSV', 'EML', 'MSG', 'PPTX', 'PPT', 'XML', 'EPUB'],
|
||||||
[SupportUploadFileTypes.audio]: ['MP3', 'M4A', 'WAV', 'WEBM', 'AMR', 'MPGA'],
|
[SupportUploadFileTypes.audio]: ['MP3', 'M4A', 'WAV', 'WEBM', 'AMR', 'MPGA'],
|
||||||
[SupportUploadFileTypes.video]: ['MP4', 'MOV', 'MPEG', 'MPGA'],
|
[SupportUploadFileTypes.video]: ['MP4', 'MOV', 'MPEG', 'MPGA'],
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user