mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-06-30 04:05:25 +08:00
fix(document_extractor): pptx file type and missing metadata_filename UnstructuredIO (#11364)
Co-authored-by: Julian Huynh <julian.huynh@immersio.io>
This commit is contained in:
parent
1490a19fa1
commit
9277156b6c
@ -1,6 +1,8 @@
|
|||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -264,14 +266,20 @@ def _extract_text_from_ppt(file_content: bytes) -> str:
|
|||||||
|
|
||||||
def _extract_text_from_pptx(file_content: bytes) -> str:
|
def _extract_text_from_pptx(file_content: bytes) -> str:
|
||||||
try:
|
try:
|
||||||
with io.BytesIO(file_content) as file:
|
if dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY:
|
||||||
if dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY:
|
with tempfile.NamedTemporaryFile(suffix=".pptx", delete=False) as temp_file:
|
||||||
elements = partition_via_api(
|
temp_file.write(file_content)
|
||||||
file=file,
|
temp_file.flush()
|
||||||
api_url=dify_config.UNSTRUCTURED_API_URL,
|
with open(temp_file.name, "rb") as file:
|
||||||
api_key=dify_config.UNSTRUCTURED_API_KEY,
|
elements = partition_via_api(
|
||||||
)
|
file=file,
|
||||||
else:
|
metadata_filename=temp_file.name,
|
||||||
|
api_url=dify_config.UNSTRUCTURED_API_URL,
|
||||||
|
api_key=dify_config.UNSTRUCTURED_API_KEY,
|
||||||
|
)
|
||||||
|
os.unlink(temp_file.name)
|
||||||
|
else:
|
||||||
|
with io.BytesIO(file_content) as file:
|
||||||
elements = partition_pptx(file=file)
|
elements = partition_pptx(file=file)
|
||||||
return "\n".join([getattr(element, "text", "") for element in elements])
|
return "\n".join([getattr(element, "text", "") for element in elements])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user