From 2c62652ea80eb6ca108268ef866b248b7e95fedd Mon Sep 17 00:00:00 2001
From: WhiteBear <2113246791@qq.com>
Date: Thu, 24 Apr 2025 11:44:10 +0800
Subject: [PATCH] <think> tag is missing. (#7256)

### What problem does this PR solve?

Some models force thinking, resulting in the absence of the think tag in
the returned content

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 agent/component/exesql.py          | 2 +-
 agent/component/generate.py        | 2 +-
 agent/component/keyword.py         | 2 +-
 agentic_reasoning/deep_research.py | 4 ++--
 api/db/services/dialog_service.py  | 4 ++--
 graphrag/general/extractor.py      | 2 +-
 rag/prompts.py                     | 8 ++++----
 rag/raptor.py                      | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/agent/component/exesql.py b/agent/component/exesql.py
index d61239d4c..2c414ddb7 100644
--- a/agent/component/exesql.py
+++ b/agent/component/exesql.py
@@ -61,7 +61,7 @@ class ExeSQL(Generate, ABC):
     component_name = "ExeSQL"
 
     def _refactor(self, ans):
-        ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+        ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
         match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
         if match:
             ans = match.group(1)  # Query content
diff --git a/agent/component/generate.py b/agent/component/generate.py
index fc17f6958..7972f77d7 100644
--- a/agent/component/generate.py
+++ b/agent/component/generate.py
@@ -200,7 +200,7 @@ class Generate(ComponentBase):
         if len(msg) < 2:
             msg.append({"role": "user", "content": "Output: "})
         ans = chat_mdl.chat(msg[0]["content"], msg[1:], self._param.gen_conf())
-        ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+        ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
 
         if self._param.cite and "chunks" in retrieval_res.columns:
             res = self.set_cite(retrieval_res, ans)
diff --git a/agent/component/keyword.py b/agent/component/keyword.py
index 1e7cbacde..d7e668ae6 100644
--- a/agent/component/keyword.py
+++ b/agent/component/keyword.py
@@ -57,7 +57,7 @@ class KeywordExtract(Generate, ABC):
         ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": query}],
                             self._param.gen_conf())
 
-        ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+        ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
         ans = re.sub(r".*keyword:", "", ans).strip()
         logging.debug(f"ans: {ans}")
         return KeywordExtract.be_output(ans)
diff --git a/agentic_reasoning/deep_research.py b/agentic_reasoning/deep_research.py
index 6976e9190..3f2a2d8a9 100644
--- a/agentic_reasoning/deep_research.py
+++ b/agentic_reasoning/deep_research.py
@@ -57,7 +57,7 @@ class DeepResearcher:
             msg_history[-1]["content"] += "\n\nContinues reasoning with the new information.\n"
             
         for ans in self.chat_mdl.chat_streamly(REASON_PROMPT, msg_history, {"temperature": 0.7}):
-            ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+            ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
             if not ans:
                 continue
             query_think = ans
@@ -142,7 +142,7 @@ class DeepResearcher:
                 [{"role": "user",
                   "content": f'Now you should analyze each web page and find helpful information based on the current search query "{search_query}" and previous reasoning steps.'}],
                 {"temperature": 0.7}):
-            ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+            ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
             if not ans:
                 continue
             summary_think = ans
diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
index 77f362709..d2efd559e 100644
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -400,7 +400,7 @@ def chat(dialog, messages, stream=True, **kwargs):
         answer = ""
         for ans in chat_mdl.chat_streamly(prompt + prompt4citation, msg[1:], gen_conf):
             if thought:
-                ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+                ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
             answer = ans
             delta_ans = ans[len(last_ans) :]
             if num_tokens_from_string(delta_ans) < 16:
@@ -436,7 +436,7 @@ Please write the SQL, only SQL, without any other explanations or text.
     def get_table():
         nonlocal sys_prompt, user_prompt, question, tried_times
         sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_prompt}], {"temperature": 0.06})
-        sql = re.sub(r"<think>.*</think>", "", sql, flags=re.DOTALL)
+        sql = re.sub(r"^.*</think>", "", sql, flags=re.DOTALL)
         logging.debug(f"{question} ==> {user_prompt} get SQL: {sql}")
         sql = re.sub(r"[\r\n]+", " ", sql.lower())
         sql = re.sub(r".*select ", "select ", sql.lower())
diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py
index c86cc1a95..1bd9f2d8d 100644
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@@ -54,7 +54,7 @@ class Extractor:
             return response
         _, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.92))
         response = self._llm.chat(system_msg[0]["content"], hist, conf)
-        response = re.sub(r"<think>.*</think>", "", response, flags=re.DOTALL)
+        response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
         if response.find("**ERROR**") >= 0:
             logging.warning(f"Extractor._chat got error. response: {response}")
             return ""
diff --git a/rag/prompts.py b/rag/prompts.py
index 4b3dd866d..d18157c09 100644
--- a/rag/prompts.py
+++ b/rag/prompts.py
@@ -196,7 +196,7 @@ Requirements:
     kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
     if isinstance(kwd, tuple):
         kwd = kwd[0]
-    kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
+    kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
     if kwd.find("**ERROR**") >= 0:
         return ""
     return kwd
@@ -223,7 +223,7 @@ Requirements:
     kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.2})
     if isinstance(kwd, tuple):
         kwd = kwd[0]
-    kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
+    kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
     if kwd.find("**ERROR**") >= 0:
         return ""
     return kwd
@@ -303,7 +303,7 @@ Output: What's the weather in Rochester on {tomorrow}?
 ###############
     """
     ans = chat_mdl.chat(prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.2})
-    ans = re.sub(r"<think>.*</think>", "", ans, flags=re.DOTALL)
+    ans = re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
     return ans if ans.find("**ERROR**") < 0 else messages[-1]["content"]
 
 
@@ -350,7 +350,7 @@ Output:
     kwd = chat_mdl.chat(prompt, msg[1:], {"temperature": 0.5})
     if isinstance(kwd, tuple):
         kwd = kwd[0]
-    kwd = re.sub(r"<think>.*</think>", "", kwd, flags=re.DOTALL)
+    kwd = re.sub(r"^.*</think>", "", kwd, flags=re.DOTALL)
     if kwd.find("**ERROR**") >= 0:
         raise Exception(kwd)
 
diff --git a/rag/raptor.py b/rag/raptor.py
index dc8fbc70f..007f2529a 100644
--- a/rag/raptor.py
+++ b/rag/raptor.py
@@ -48,7 +48,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
         response = await trio.to_thread.run_sync(
             lambda: self._llm_model.chat(system, history, gen_conf)
         )
-        response = re.sub(r"<think>.*</think>", "", response, flags=re.DOTALL)
+        response = re.sub(r"^.*</think>", "", response, flags=re.DOTALL)
         if response.find("**ERROR**") >= 0:
             raise Exception(response)
         set_llm_cache(self._llm_model.llm_name, system, response, history, gen_conf)