From 176d91937db8209696854b9d23de69d22bdcc97c Mon Sep 17 00:00:00 2001 From: Oliver Lee Date: Fri, 31 May 2024 14:19:33 +0800 Subject: [PATCH] fix 'NoneType' and new ContentType supported. (#4818) --- api/core/rag/extractor/extract_processor.py | 2 +- api/core/tools/utils/web_reader_tool.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 39dc0996ac..09d192d410 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -29,7 +29,7 @@ from core.rag.models.document import Document from extensions.ext_storage import storage from models.model import UploadFile -SUPPORT_URL_CONTENT_TYPES = ['application/pdf', 'text/plain'] +SUPPORT_URL_CONTENT_TYPES = ['application/pdf', 'text/plain', 'application/json'] USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" diff --git a/api/core/tools/utils/web_reader_tool.py b/api/core/tools/utils/web_reader_tool.py index 7b9aaaba3d..4c69c6eddc 100644 --- a/api/core/tools/utils/web_reader_tool.py +++ b/api/core/tools/utils/web_reader_tool.py @@ -54,7 +54,7 @@ def get_url(url: str, user_agent: str = None) -> str: if content_type: main_content_type = response.headers.get('Content-Type').split(';')[0].strip() else: - content_disposition = response.headers.get('Content-Disposition') + content_disposition = response.headers.get('Content-Disposition', '') filename_match = re.search(r'filename="([^"]+)"', content_disposition) if filename_match: filename = unquote(filename_match.group(1))