From 1119790b027e9c469ad17f739d30df049f3bec79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=B9=9B=E9=9C=B2=E5=85=88=E7=94=9F?=
 <zhanluxianshen@163.com>
Date: Fri, 9 May 2025 16:39:16 +0800
Subject: [PATCH] clean rag word_extractor. (#19397)

Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
---
 api/core/rag/extractor/word_extractor.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py
index edaa8c92fa..a4ccdcafd3 100644
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -76,8 +76,7 @@ class WordExtractor(BaseExtractor):
         parsed = urlparse(url)
         return bool(parsed.netloc) and bool(parsed.scheme)
 
-    def _extract_images_from_docx(self, doc, image_folder):
-        os.makedirs(image_folder, exist_ok=True)
+    def _extract_images_from_docx(self, doc):
         image_count = 0
         image_map = {}
 
@@ -210,7 +209,7 @@ class WordExtractor(BaseExtractor):
 
         content = []
 
-        image_map = self._extract_images_from_docx(doc, image_folder)
+        image_map = self._extract_images_from_docx(doc)
 
         hyperlinks_url = None
         url_pattern = re.compile(r"http://[^\s+]+//|https://[^\s+]+")
@@ -225,7 +224,7 @@ class WordExtractor(BaseExtractor):
                         xml = ElementTree.XML(run.element.xml)
                         x_child = [c for c in xml.iter() if c is not None]
                         for x in x_child:
-                            if x_child is None:
+                            if x is None:
                                 continue
                             if x.tag.endswith("instrText"):
                                 if x.text is None: