From 1024fc623efd19389742c5c1afa49ebf0a35a342 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:22:07 +0800 Subject: [PATCH] fix the ssrf of docx file extractor external images (#10237) --- api/core/rag/extractor/word_extractor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index ae3c25125c..d4434ea28f 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -14,6 +14,7 @@ import requests from docx import Document as DocxDocument from configs import dify_config +from core.helper import ssrf_proxy from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db @@ -86,7 +87,7 @@ class WordExtractor(BaseExtractor): image_count += 1 if rel.is_external: url = rel.reltype - response = requests.get(url, stream=True) + response = ssrf_proxy.get(url, stream=True) if response.status_code == 200: image_ext = mimetypes.guess_extension(response.headers["Content-Type"]) file_uuid = str(uuid.uuid4())