mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 23:15:59 +08:00
Add UNSTRUCTURED_API_KEY env support (#4369)
This commit is contained in:
parent
3a51f2a778
commit
b5204111da
@ -144,6 +144,7 @@ NOTION_INTERNAL_SECRET=you-internal-secret
|
||||
|
||||
ETL_TYPE=dify
|
||||
UNSTRUCTURED_API_URL=
|
||||
UNSTRUCTURED_API_KEY=
|
||||
|
||||
SSRF_PROXY_HTTP_URL=
|
||||
SSRF_PROXY_HTTPS_URL=
|
||||
|
@ -365,6 +365,7 @@ class Config:
|
||||
|
||||
self.ETL_TYPE = get_env('ETL_TYPE')
|
||||
self.UNSTRUCTURED_API_URL = get_env('UNSTRUCTURED_API_URL')
|
||||
self.UNSTRUCTURED_API_KEY = get_env('UNSTRUCTURED_API_KEY')
|
||||
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
|
||||
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
|
||||
|
||||
|
@ -96,6 +96,7 @@ class ExtractProcessor:
|
||||
file_extension = input_file.suffix.lower()
|
||||
etl_type = current_app.config['ETL_TYPE']
|
||||
unstructured_api_url = current_app.config['UNSTRUCTURED_API_URL']
|
||||
unstructured_api_key = current_app.config['UNSTRUCTURED_API_KEY']
|
||||
if etl_type == 'Unstructured':
|
||||
if file_extension == '.xlsx' or file_extension == '.xls':
|
||||
extractor = ExcelExtractor(file_path)
|
||||
@ -115,7 +116,7 @@ class ExtractProcessor:
|
||||
elif file_extension == '.eml':
|
||||
extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url)
|
||||
elif file_extension == '.ppt':
|
||||
extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url)
|
||||
extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||
elif file_extension == '.pptx':
|
||||
extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
|
||||
elif file_extension == '.xml':
|
||||
|
@ -17,16 +17,18 @@ class UnstructuredPPTExtractor(BaseExtractor):
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str,
|
||||
api_url: str
|
||||
api_url: str,
|
||||
api_key: str
|
||||
):
|
||||
"""Initialize with file path."""
|
||||
self._file_path = file_path
|
||||
self._api_url = api_url
|
||||
self._api_key = api_key
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
from unstructured.partition.api import partition_via_api
|
||||
|
||||
elements = partition_via_api(filename=self._file_path, api_url=self._api_url)
|
||||
elements = partition_via_api(filename=self._file_path, api_url=self._api_url, api_key=self._api_key)
|
||||
text_by_page = {}
|
||||
for element in elements:
|
||||
page = element.metadata.page_number
|
||||
|
Loading…
x
Reference in New Issue
Block a user