mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-11 06:08:59 +08:00
Feat: Add qwq model support to Tongyi-Qianwen factory (#5981)
### What problem does this PR solve? add qwq model support to Tongyi-Qianwen factory https://github.com/infiniflow/ragflow/issues/5869 ### Type of change - [x] New Feature (non-breaking change which adds functionality)     Co-authored-by: zhaozhicheng <zhicheng.zhao@fastonetech.com>
This commit is contained in:
parent
c57f16d16f
commit
6e13922bdc
@ -134,6 +134,18 @@
|
|||||||
"max_tokens": 32768,
|
"max_tokens": 32768,
|
||||||
"model_type": "chat"
|
"model_type": "chat"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "qwq-32b",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"model_type": "chat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"llm_name": "qwq-plus",
|
||||||
|
"tags": "LLM,CHAT,128k",
|
||||||
|
"max_tokens": 131072,
|
||||||
|
"model_type": "chat"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "qwen-long",
|
"llm_name": "qwen-long",
|
||||||
"tags": "LLM,CHAT,10000K",
|
"tags": "LLM,CHAT,10000K",
|
||||||
@ -3259,7 +3271,7 @@
|
|||||||
"tags": "TEXT EMBEDDING,32000",
|
"tags": "TEXT EMBEDDING,32000",
|
||||||
"max_tokens": 32000,
|
"max_tokens": 32000,
|
||||||
"model_type": "embedding"
|
"model_type": "embedding"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"llm_name": "rerank-1",
|
"llm_name": "rerank-1",
|
||||||
"tags": "RE-RANK, 8000",
|
"tags": "RE-RANK, 8000",
|
||||||
|
@ -268,13 +268,13 @@ class QWenChat(Base):
|
|||||||
import dashscope
|
import dashscope
|
||||||
dashscope.api_key = key
|
dashscope.api_key = key
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
if model_name.lower().find("deepseek") >= 0:
|
if self.is_reasoning_model(self.model_name):
|
||||||
super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
||||||
|
|
||||||
def chat(self, system, history, gen_conf):
|
def chat(self, system, history, gen_conf):
|
||||||
if "max_tokens" in gen_conf:
|
if "max_tokens" in gen_conf:
|
||||||
del gen_conf["max_tokens"]
|
del gen_conf["max_tokens"]
|
||||||
if self.model_name.lower().find("deepseek") >= 0:
|
if self.is_reasoning_model(self.model_name):
|
||||||
return super().chat(system, history, gen_conf)
|
return super().chat(system, history, gen_conf)
|
||||||
|
|
||||||
stream_flag = str(os.environ.get('QWEN_CHAT_BY_STREAM', 'true')).lower() == 'true'
|
stream_flag = str(os.environ.get('QWEN_CHAT_BY_STREAM', 'true')).lower() == 'true'
|
||||||
@ -348,11 +348,19 @@ class QWenChat(Base):
|
|||||||
def chat_streamly(self, system, history, gen_conf):
|
def chat_streamly(self, system, history, gen_conf):
|
||||||
if "max_tokens" in gen_conf:
|
if "max_tokens" in gen_conf:
|
||||||
del gen_conf["max_tokens"]
|
del gen_conf["max_tokens"]
|
||||||
if self.model_name.lower().find("deepseek") >= 0:
|
if self.is_reasoning_model(self.model_name):
|
||||||
return super().chat_streamly(system, history, gen_conf)
|
return super().chat_streamly(system, history, gen_conf)
|
||||||
|
|
||||||
return self._chat_streamly(system, history, gen_conf)
|
return self._chat_streamly(system, history, gen_conf)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def is_reasoning_model(model_name: str) -> bool:
|
||||||
|
return any([
|
||||||
|
model_name.lower().find("deepseek") >= 0,
|
||||||
|
model_name.lower().find("qwq") >= 0 and model_name.lower() != 'qwq-32b-preview',
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ZhipuChat(Base):
|
class ZhipuChat(Base):
|
||||||
def __init__(self, key, model_name="glm-3-turbo", **kwargs):
|
def __init__(self, key, model_name="glm-3-turbo", **kwargs):
|
||||||
@ -740,7 +748,7 @@ class BedrockChat(Base):
|
|||||||
self.bedrock_sk = json.loads(key).get('bedrock_sk', '')
|
self.bedrock_sk = json.loads(key).get('bedrock_sk', '')
|
||||||
self.bedrock_region = json.loads(key).get('bedrock_region', '')
|
self.bedrock_region = json.loads(key).get('bedrock_region', '')
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
|
||||||
if self.bedrock_ak == '' or self.bedrock_sk == '' or self.bedrock_region == '':
|
if self.bedrock_ak == '' or self.bedrock_sk == '' or self.bedrock_region == '':
|
||||||
# Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
|
# Try to create a client using the default credentials (AWS_PROFILE, AWS_DEFAULT_REGION, etc.)
|
||||||
self.client = boto3.client('bedrock-runtime')
|
self.client = boto3.client('bedrock-runtime')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user