From 18d3877151cf59765e7758dc8c04f372cf397e52 Mon Sep 17 00:00:00 2001
From: takatost <takatost@users.noreply.github.com>
Date: Thu, 24 Aug 2023 13:58:34 +0800
Subject: [PATCH] feat: optimize xinference stream (#989)

---
 api/core/third_party/langchain/llms/xinference_llm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/api/core/third_party/langchain/llms/xinference_llm.py b/api/core/third_party/langchain/llms/xinference_llm.py
index 0aa73057aa..dda3659d8e 100644
--- a/api/core/third_party/langchain/llms/xinference_llm.py
+++ b/api/core/third_party/langchain/llms/xinference_llm.py
@@ -108,12 +108,12 @@ class XinferenceLLM(Xinference):
         Yields:
             A string token.
         """
-        if isinstance(model, RESTfulGenerateModelHandle):
-            streaming_response = model.generate(
+        if isinstance(model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)):
+            streaming_response = model.chat(
                 prompt=prompt, generate_config=generate_config
             )
         else:
-            streaming_response = model.chat(
+            streaming_response = model.generate(
                 prompt=prompt, generate_config=generate_config
             )