diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index f730c6375..b15dd092a 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -206,6 +206,7 @@ class QWenChat(Base): if system: history.insert(0, {"role": "system", "content": system}) ans = "" + tk_count = 0 try: response = Generation.call( self.model_name, @@ -214,7 +215,6 @@ class QWenChat(Base): stream=True, **gen_conf ) - tk_count = 0 for resp in response: if resp.status_code == HTTPStatus.OK: ans = resp.output.choices[0]['message']['content'] @@ -261,6 +261,7 @@ class ZhipuChat(Base): if "presence_penalty" in gen_conf: del gen_conf["presence_penalty"] if "frequency_penalty" in gen_conf: del gen_conf["frequency_penalty"] ans = "" + tk_count = 0 try: response = self.client.chat.completions.create( model=self.model_name, @@ -268,7 +269,6 @@ class ZhipuChat(Base): stream=True, **gen_conf ) - tk_count = 0 for resp in response: if not resp.choices[0].delta.content:continue delta = resp.choices[0].delta.content @@ -439,6 +439,7 @@ class VolcEngineChat(Base): if system: history.insert(0, {"role": "system", "content": system}) ans = "" + tk_count = 0 try: req = { "parameters": {