diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index cbfa858f7..3d92f378f 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -915,7 +915,7 @@ class OllamaChat(Base): if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] - response = self.client.chat(model=self.model_name, messages=history, options=options, keep_alive=10) + response = self.client.chat(model=self.model_name, messages=history, options=options) ans = response["message"]["content"].strip() token_count = response.get("eval_count", 0) + response.get("prompt_eval_count", 0) return ans, token_count @@ -944,7 +944,7 @@ class OllamaChat(Base): ans = "" try: - response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options, keep_alive=10) + response = self.client.chat(model=self.model_name, messages=history, stream=True, options=options) for resp in response: if resp["done"]: token_count = resp.get("prompt_eval_count", 0) + resp.get("eval_count", 0) diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py index e0dbea2e5..f9d4e67c1 100644 --- a/rag/llm/cv_model.py +++ b/rag/llm/cv_model.py @@ -500,8 +500,7 @@ class OllamaCV(Base): response = self.client.chat( model=self.model_name, messages=history, - options=options, - keep_alive=-1 + options=options ) ans = response["message"]["content"].strip() @@ -531,8 +530,7 @@ class OllamaCV(Base): model=self.model_name, messages=history, stream=True, - options=options, - keep_alive=-1 + options=options ) for resp in response: if resp["done"]: