mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-10 04:48:59 +08:00
Feat: Add qwq model support to Tongyi-Qianwen factory (#5981)
### What problem does this PR solve? add qwq model support to Tongyi-Qianwen factory https://github.com/infiniflow/ragflow/issues/5869 ### Type of change - [x] New Feature (non-breaking change which adds functionality)     Co-authored-by: zhaozhicheng <zhicheng.zhao@fastonetech.com>
This commit is contained in:
parent
c57f16d16f
commit
6e13922bdc
@ -134,6 +134,18 @@
|
||||
"max_tokens": 32768,
|
||||
"model_type": "chat"
|
||||
},
|
||||
{
|
||||
"llm_name": "qwq-32b",
|
||||
"tags": "LLM,CHAT,128k",
|
||||
"max_tokens": 131072,
|
||||
"model_type": "chat"
|
||||
},
|
||||
{
|
||||
"llm_name": "qwq-plus",
|
||||
"tags": "LLM,CHAT,128k",
|
||||
"max_tokens": 131072,
|
||||
"model_type": "chat"
|
||||
},
|
||||
{
|
||||
"llm_name": "qwen-long",
|
||||
"tags": "LLM,CHAT,10000K",
|
||||
@ -3259,7 +3271,7 @@
|
||||
"tags": "TEXT EMBEDDING,32000",
|
||||
"max_tokens": 32000,
|
||||
"model_type": "embedding"
|
||||
},
|
||||
},
|
||||
{
|
||||
"llm_name": "rerank-1",
|
||||
"tags": "RE-RANK, 8000",
|
||||
|
@ -268,13 +268,13 @@ class QWenChat(Base):
|
||||
import dashscope
|
||||
dashscope.api_key = key
|
||||
self.model_name = model_name
|
||||
if model_name.lower().find("deepseek") >= 0:
|
||||
if self.is_reasoning_model(self.model_name):
|
||||
super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||||
|
||||
def chat(self, system, history, gen_conf):
|
||||
if "max_tokens" in gen_conf:
|
||||
del gen_conf["max_tokens"]
|
||||
if self.model_name.lower().find("deepseek") >= 0:
|
||||
if self.is_reasoning_model(self.model_name):
|
||||
return super().chat(system, history, gen_conf)
|
||||
|
||||
stream_flag = str(os.environ.get('QWEN_CHAT_BY_STREAM', 'true')).lower() == 'true'
|
||||
@ -348,11 +348,19 @@ class QWenChat(Base):
|
||||
def chat_streamly(self, system, history, gen_conf):
|
||||
if "max_tokens" in gen_conf:
|
||||
del gen_conf["max_tokens"]
|
||||
if self.model_name.lower().find("deepseek") >= 0:
|
||||
if self.is_reasoning_model(self.model_name):
|
||||
return super().chat_streamly(system, history, gen_conf)
|
||||
|
||||
return self._chat_streamly(system, history, gen_conf)
|
||||
|
||||
@staticmethod
|
||||
def is_reasoning_model(model_name: str) -> bool:
|
||||
return any([
|
||||
model_name.lower().find("deepseek") >= 0,
|
||||
model_name.lower().find("qwq") >= 0 and model_name.lower() != 'qwq-32b-preview',
|
||||
])
|
||||
|
||||
|
||||
|
||||
class ZhipuChat(Base):
|
||||
def __init__(self, key, model_name="glm-3-turbo", **kwargs):
|
||||
@ -740,7 +748,7 @@ class BedrockChat(Base):
|
||||
self.bedrock_sk = json.loads(key).get('bedrock_sk', '')
|
||||
self.bedrock_region = json.loads(key).get('bedrock_region', '')
|
||||
self.model_name = model_name
|
||||
|
||||
|
||||
if self.bedrock_ak == '' or self.bedrock_sk == '' or self.bedrock_region == '':
|
||||
# Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
|
||||
self.client = boto3.client('bedrock-runtime')
|
||||
|
Loading…
x
Reference in New Issue
Block a user