From aae29e72ae06d47a54099a25cf2f52d265d17fcd Mon Sep 17 00:00:00 2001
From: Tao Wang <74752235+taowang1993@users.noreply.github.com>
Date: Sun, 24 Nov 2024 19:03:53 -0800
Subject: [PATCH] Fix Deepseek Function/Tool Calling (#11023)

---
 .../deepseek/llm/deepseek-chat.yaml           |  7 +-
 .../deepseek/llm/deepseek-coder.yaml          |  1 +
 .../model_providers/deepseek/llm/llm.py       | 98 ++-----------------
 3 files changed, 15 insertions(+), 91 deletions(-)

diff --git a/api/core/model_runtime/model_providers/deepseek/llm/deepseek-chat.yaml b/api/core/model_runtime/model_providers/deepseek/llm/deepseek-chat.yaml
index 4973ac8ad6..0bbd27ad74 100644
--- a/api/core/model_runtime/model_providers/deepseek/llm/deepseek-chat.yaml
+++ b/api/core/model_runtime/model_providers/deepseek/llm/deepseek-chat.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - tool-call
   - multi-tool-call
   - stream-tool-call
 model_properties:
@@ -72,7 +73,7 @@ parameter_rules:
       - text
       - json_object
 pricing:
-  input: '1'
-  output: '2'
-  unit: '0.000001'
+  input: "1"
+  output: "2"
+  unit: "0.000001"
   currency: RMB
diff --git a/api/core/model_runtime/model_providers/deepseek/llm/deepseek-coder.yaml b/api/core/model_runtime/model_providers/deepseek/llm/deepseek-coder.yaml
index caafeadadd..97310e76b9 100644
--- a/api/core/model_runtime/model_providers/deepseek/llm/deepseek-coder.yaml
+++ b/api/core/model_runtime/model_providers/deepseek/llm/deepseek-coder.yaml
@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - tool-call
   - multi-tool-call
   - stream-tool-call
 model_properties:
diff --git a/api/core/model_runtime/model_providers/deepseek/llm/llm.py b/api/core/model_runtime/model_providers/deepseek/llm/llm.py
index 6d0a3ee262..610dc7b458 100644
--- a/api/core/model_runtime/model_providers/deepseek/llm/llm.py
+++ b/api/core/model_runtime/model_providers/deepseek/llm/llm.py
@@ -1,18 +1,17 @@
 from collections.abc import Generator
 from typing import Optional, Union
-from urllib.parse import urlparse
 
-import tiktoken
+from yarl import URL
 
-from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
 from core.model_runtime.entities.message_entities import (
     PromptMessage,
     PromptMessageTool,
 )
-from core.model_runtime.model_providers.openai.llm.llm import OpenAILargeLanguageModel
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
 
 
-class DeepSeekLargeLanguageModel(OpenAILargeLanguageModel):
+class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
     def _invoke(
         self,
         model: str,
@@ -25,92 +24,15 @@ class DeepSeekLargeLanguageModel(OpenAILargeLanguageModel):
         user: Optional[str] = None,
     ) -> Union[LLMResult, Generator]:
         self._add_custom_parameters(credentials)
-
-        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
         self._add_custom_parameters(credentials)
         super().validate_credentials(model, credentials)
 
-    # refactored from openai model runtime, use cl100k_base for calculate token number
-    def _num_tokens_from_string(self, model: str, text: str, tools: Optional[list[PromptMessageTool]] = None) -> int:
-        """
-        Calculate num tokens for text completion model with tiktoken package.
-
-        :param model: model name
-        :param text: prompt text
-        :param tools: tools for tool calling
-        :return: number of tokens
-        """
-        encoding = tiktoken.get_encoding("cl100k_base")
-        num_tokens = len(encoding.encode(text))
-
-        if tools:
-            num_tokens += self._num_tokens_for_tools(encoding, tools)
-
-        return num_tokens
-
-    # refactored from openai model runtime, use cl100k_base for calculate token number
-    def _num_tokens_from_messages(
-        self, model: str, messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None
-    ) -> int:
-        """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package.
-
-        Official documentation: https://github.com/openai/openai-cookbook/blob/
-        main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb"""
-        encoding = tiktoken.get_encoding("cl100k_base")
-        tokens_per_message = 3
-        tokens_per_name = 1
-
-        num_tokens = 0
-        messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages]
-        for message in messages_dict:
-            num_tokens += tokens_per_message
-            for key, value in message.items():
-                # Cast str(value) in case the message value is not a string
-                # This occurs with function messages
-                # TODO: The current token calculation method for the image type is not implemented,
-                #  which need to download the image and then get the resolution for calculation,
-                #  and will increase the request delay
-                if isinstance(value, list):
-                    text = ""
-                    for item in value:
-                        if isinstance(item, dict) and item["type"] == "text":
-                            text += item["text"]
-
-                    value = text
-
-                if key == "tool_calls":
-                    for tool_call in value:
-                        for t_key, t_value in tool_call.items():
-                            num_tokens += len(encoding.encode(t_key))
-                            if t_key == "function":
-                                for f_key, f_value in t_value.items():
-                                    num_tokens += len(encoding.encode(f_key))
-                                    num_tokens += len(encoding.encode(f_value))
-                            else:
-                                num_tokens += len(encoding.encode(t_key))
-                                num_tokens += len(encoding.encode(t_value))
-                else:
-                    num_tokens += len(encoding.encode(str(value)))
-
-                if key == "name":
-                    num_tokens += tokens_per_name
-
-        # every reply is primed with <im_start>assistant
-        num_tokens += 3
-
-        if tools:
-            num_tokens += self._num_tokens_for_tools(encoding, tools)
-
-        return num_tokens
-
     @staticmethod
-    def _add_custom_parameters(credentials: dict) -> None:
-        credentials["mode"] = "chat"
-        credentials["openai_api_key"] = credentials["api_key"]
-        if "endpoint_url" not in credentials or credentials["endpoint_url"] == "":
-            credentials["openai_api_base"] = "https://api.deepseek.com"
-        else:
-            parsed_url = urlparse(credentials["endpoint_url"])
-            credentials["openai_api_base"] = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    def _add_custom_parameters(credentials) -> None:
+        credentials["endpoint_url"] = str(URL(credentials.get("endpoint_url", "https://api.deepseek.com")))
+        credentials["mode"] = LLMMode.CHAT.value
+        credentials["function_calling_type"] = "tool_call"
+        credentials["stream_function_calling"] = "support"