Feat: Add qwq model support to Tongyi-Qianwen factory (#5981)

### What problem does this PR solve?

add qwq model support to Tongyi-Qianwen factory
https://github.com/infiniflow/ragflow/issues/5869

### Type of change

- [x] New Feature (non-breaking change which adds functionality)


![image](https://github.com/user-attachments/assets/49f5c6a0-ecaf-41dd-a23a-2009f854d62c)


![image](https://github.com/user-attachments/assets/93ffa303-920e-4942-8188-bcd6b7209204)


![1741774779438](https://github.com/user-attachments/assets/25f2fd1d-8640-4df0-9a08-78ee9daaa8fe)


![image](https://github.com/user-attachments/assets/4763cf6c-1f76-43c4-80ee-74dfd666a184)

Co-authored-by: zhaozhicheng <zhicheng.zhao@fastonetech.com>
This commit is contained in:
kuro5989 2025-03-12 18:54:15 +08:00 committed by GitHub
parent c57f16d16f
commit 6e13922bdc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 5 deletions

View File

@ -134,6 +134,18 @@
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "qwq-32b",
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "qwq-plus",
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "qwen-long",
"tags": "LLM,CHAT,10000K",
@ -3259,7 +3271,7 @@
"tags": "TEXT EMBEDDING,32000",
"max_tokens": 32000,
"model_type": "embedding"
},
},
{
"llm_name": "rerank-1",
"tags": "RE-RANK, 8000",

View File

@ -268,13 +268,13 @@ class QWenChat(Base):
import dashscope
dashscope.api_key = key
self.model_name = model_name
if model_name.lower().find("deepseek") >= 0:
if self.is_reasoning_model(self.model_name):
super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")
def chat(self, system, history, gen_conf):
if "max_tokens" in gen_conf:
del gen_conf["max_tokens"]
if self.model_name.lower().find("deepseek") >= 0:
if self.is_reasoning_model(self.model_name):
return super().chat(system, history, gen_conf)
stream_flag = str(os.environ.get('QWEN_CHAT_BY_STREAM', 'true')).lower() == 'true'
@ -348,11 +348,19 @@ class QWenChat(Base):
def chat_streamly(self, system, history, gen_conf):
if "max_tokens" in gen_conf:
del gen_conf["max_tokens"]
if self.model_name.lower().find("deepseek") >= 0:
if self.is_reasoning_model(self.model_name):
return super().chat_streamly(system, history, gen_conf)
return self._chat_streamly(system, history, gen_conf)
@staticmethod
def is_reasoning_model(model_name: str) -> bool:
return any([
model_name.lower().find("deepseek") >= 0,
model_name.lower().find("qwq") >= 0 and model_name.lower() != 'qwq-32b-preview',
])
class ZhipuChat(Base):
def __init__(self, key, model_name="glm-3-turbo", **kwargs):
@ -740,7 +748,7 @@ class BedrockChat(Base):
self.bedrock_sk = json.loads(key).get('bedrock_sk', '')
self.bedrock_region = json.loads(key).get('bedrock_region', '')
self.model_name = model_name
if self.bedrock_ak == '' or self.bedrock_sk == '' or self.bedrock_region == '':
# Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
self.client = boto3.client('bedrock-runtime')