Feat: Add qwq model support to Tongyi-Qianwen factory (#5981)

### What problem does this PR solve? add qwq model support to Tongyi-Qianwen factory https://github.com/infiniflow/ragflow/issues/5869 ### Type of change - [x] New Feature (non-breaking change which adds functionality) ![image](https://github.com/user-attachments/assets/49f5c6a0-ecaf-41dd-a23a-2009f854d62c) ![image](https://github.com/user-attachments/assets/93ffa303-920e-4942-8188-bcd6b7209204) ![1741774779438](https://github.com/user-attachments/assets/25f2fd1d-8640-4df0-9a08-78ee9daaa8fe) ![image](https://github.com/user-attachments/assets/4763cf6c-1f76-43c4-80ee-74dfd666a184) Co-authored-by: zhaozhicheng <zhicheng.zhao@fastonetech.com>
2025-08-10 04:48:59 +08:00 · 2025-03-12 18:54:15 +08:00 · 2025-03-12 18:54:15 +08:00 · 6e13922bdc
commit 6e13922bdc
parent c57f16d16f
2 changed files with 25 additions and 5 deletions
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@ -134,6 +134,18 @@
                    "max_tokens": 32768,
                    "model_type": "chat"
                },
+                {
+                    "llm_name": "qwq-32b",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 131072,
+                    "model_type": "chat"
+                },
+                {
+                    "llm_name": "qwq-plus",
+                    "tags": "LLM,CHAT,128k",
+                    "max_tokens": 131072,
+                    "model_type": "chat"
+                },
                {
                    "llm_name": "qwen-long",
                    "tags": "LLM,CHAT,10000K",
@ -3259,7 +3271,7 @@
                    "tags": "TEXT EMBEDDING,32000",
                    "max_tokens": 32000,
                    "model_type": "embedding"
-                },                
+                },
                {
                    "llm_name": "rerank-1",
                    "tags": "RE-RANK, 8000",
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -268,13 +268,13 @@ class QWenChat(Base):
        import dashscope
        dashscope.api_key = key
        self.model_name = model_name
-        if model_name.lower().find("deepseek") >= 0:
+        if self.is_reasoning_model(self.model_name):
            super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")

    def chat(self, system, history, gen_conf):
        if "max_tokens" in gen_conf:
            del gen_conf["max_tokens"]
-        if self.model_name.lower().find("deepseek") >= 0:
+        if self.is_reasoning_model(self.model_name):
            return super().chat(system, history, gen_conf)

        stream_flag = str(os.environ.get('QWEN_CHAT_BY_STREAM', 'true')).lower() == 'true'
@ -348,11 +348,19 @@ class QWenChat(Base):
    def chat_streamly(self, system, history, gen_conf):
        if "max_tokens" in gen_conf:
            del gen_conf["max_tokens"]
-        if self.model_name.lower().find("deepseek") >= 0:
+        if self.is_reasoning_model(self.model_name):
            return super().chat_streamly(system, history, gen_conf)

        return self._chat_streamly(system, history, gen_conf)

+    @staticmethod
+    def is_reasoning_model(model_name: str) -> bool:
+        return any([
+            model_name.lower().find("deepseek") >= 0,
+            model_name.lower().find("qwq") >= 0 and model_name.lower() != 'qwq-32b-preview',
+        ])
+
+

 class ZhipuChat(Base):
    def __init__(self, key, model_name="glm-3-turbo", **kwargs):
@ -740,7 +748,7 @@ class BedrockChat(Base):
        self.bedrock_sk = json.loads(key).get('bedrock_sk', '')
        self.bedrock_region = json.loads(key).get('bedrock_region', '')
        self.model_name = model_name
-        
+
        if self.bedrock_ak == '' or self.bedrock_sk == '' or self.bedrock_region == '':
            # Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
            self.client = boto3.client('bedrock-runtime')