mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 23:15:59 +08:00
Add UNSTRUCTURED_API_KEY env support (#4369)
This commit is contained in:
parent
3a51f2a778
commit
b5204111da
@ -144,6 +144,7 @@ NOTION_INTERNAL_SECRET=you-internal-secret
|
|||||||
|
|
||||||
ETL_TYPE=dify
|
ETL_TYPE=dify
|
||||||
UNSTRUCTURED_API_URL=
|
UNSTRUCTURED_API_URL=
|
||||||
|
UNSTRUCTURED_API_KEY=
|
||||||
|
|
||||||
SSRF_PROXY_HTTP_URL=
|
SSRF_PROXY_HTTP_URL=
|
||||||
SSRF_PROXY_HTTPS_URL=
|
SSRF_PROXY_HTTPS_URL=
|
||||||
|
@ -365,6 +365,7 @@ class Config:
|
|||||||
|
|
||||||
self.ETL_TYPE = get_env('ETL_TYPE')
|
self.ETL_TYPE = get_env('ETL_TYPE')
|
||||||
self.UNSTRUCTURED_API_URL = get_env('UNSTRUCTURED_API_URL')
|
self.UNSTRUCTURED_API_URL = get_env('UNSTRUCTURED_API_URL')
|
||||||
|
self.UNSTRUCTURED_API_KEY = get_env('UNSTRUCTURED_API_KEY')
|
||||||
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
|
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
|
||||||
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
|
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
|
||||||
|
|
||||||
|
@ -96,6 +96,7 @@ class ExtractProcessor:
|
|||||||
file_extension = input_file.suffix.lower()
|
file_extension = input_file.suffix.lower()
|
||||||
etl_type = current_app.config['ETL_TYPE']
|
etl_type = current_app.config['ETL_TYPE']
|
||||||
unstructured_api_url = current_app.config['UNSTRUCTURED_API_URL']
|
unstructured_api_url = current_app.config['UNSTRUCTURED_API_URL']
|
||||||
|
unstructured_api_key = current_app.config['UNSTRUCTURED_API_KEY']
|
||||||
if etl_type == 'Unstructured':
|
if etl_type == 'Unstructured':
|
||||||
if file_extension == '.xlsx' or file_extension == '.xls':
|
if file_extension == '.xlsx' or file_extension == '.xls':
|
||||||
extractor = ExcelExtractor(file_path)
|
extractor = ExcelExtractor(file_path)
|
||||||
@ -115,7 +116,7 @@ class ExtractProcessor:
|
|||||||
elif file_extension == '.eml':
|
elif file_extension == '.eml':
|
||||||
extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url)
|
extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url)
|
||||||
elif file_extension == '.ppt':
|
elif file_extension == '.ppt':
|
||||||
extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url)
|
extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||||
elif file_extension == '.pptx':
|
elif file_extension == '.pptx':
|
||||||
extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
|
extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
|
||||||
elif file_extension == '.xml':
|
elif file_extension == '.xml':
|
||||||
|
@ -17,16 +17,18 @@ class UnstructuredPPTExtractor(BaseExtractor):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
file_path: str,
|
file_path: str,
|
||||||
api_url: str
|
api_url: str,
|
||||||
|
api_key: str
|
||||||
):
|
):
|
||||||
"""Initialize with file path."""
|
"""Initialize with file path."""
|
||||||
self._file_path = file_path
|
self._file_path = file_path
|
||||||
self._api_url = api_url
|
self._api_url = api_url
|
||||||
|
self._api_key = api_key
|
||||||
|
|
||||||
def extract(self) -> list[Document]:
|
def extract(self) -> list[Document]:
|
||||||
from unstructured.partition.api import partition_via_api
|
from unstructured.partition.api import partition_via_api
|
||||||
|
|
||||||
elements = partition_via_api(filename=self._file_path, api_url=self._api_url)
|
elements = partition_via_api(filename=self._file_path, api_url=self._api_url, api_key=self._api_key)
|
||||||
text_by_page = {}
|
text_by_page = {}
|
||||||
for element in elements:
|
for element in elements:
|
||||||
page = element.metadata.page_number
|
page = element.metadata.page_number
|
||||||
|
Loading…
x
Reference in New Issue
Block a user