diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 783faf59a..cbfa858f7 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -565,7 +565,7 @@ class QWenChat(Base): dashscope.api_key = key self.model_name = model_name - if self.is_reasoning_model(self.model_name): + if self.is_reasoning_model(self.model_name) or self.model_name in ["qwen-vl-plus", "qwen-vl-plus-latest", "qwen-vl-max", "qwen-vl-max-latest"]: super().__init__(key, model_name, "https://dashscope.aliyuncs.com/compatible-mode/v1") def chat_with_tools(self, system: str, history: list, gen_conf: dict) -> tuple[str, int]: @@ -643,7 +643,7 @@ class QWenChat(Base): def chat(self, system, history, gen_conf): if "max_tokens" in gen_conf: del gen_conf["max_tokens"] - if self.is_reasoning_model(self.model_name): + if self.is_reasoning_model(self.model_name) or self.model_name in ["qwen-vl-plus", "qwen-vl-plus-latest", "qwen-vl-max", "qwen-vl-max-latest"]: return super().chat(system, history, gen_conf) stream_flag = str(os.environ.get("QWEN_CHAT_BY_STREAM", "true")).lower() == "true" @@ -811,7 +811,7 @@ class QWenChat(Base): def chat_streamly(self, system, history, gen_conf): if "max_tokens" in gen_conf: del gen_conf["max_tokens"] - if self.is_reasoning_model(self.model_name): + if self.is_reasoning_model(self.model_name) or self.model_name in ["qwen-vl-plus", "qwen-vl-plus-latest", "qwen-vl-max", "qwen-vl-max-latest"]: return super().chat_streamly(system, history, gen_conf) return self._chat_streamly(system, history, gen_conf)