Refa: update Anthropic models. (#6445)

### What problem does this PR solve? #6421 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-07-10 22:01:46 +08:00 · 2025-03-24 12:34:57 +08:00 · 2025-03-24 12:34:57 +08:00 · 85eb3775d6
commit 85eb3775d6
parent e4c8d703b5
4 changed files with 148 additions and 23 deletions
--- a/conf/llm_factories.json
+++ b/conf/llm_factories.json
@ -3169,34 +3169,28 @@
            "status": "1",
            "llm": [
                {
-                    "llm_name": "claude-3-5-sonnet-20240620",
-                    "tags": "LLM,CHAT,200k",
+                    "llm_name": "claude-3-7-sonnet-20250219",
+                    "tags": "LLM,IMAGE2TEXT,200k",
                    "max_tokens": 204800,
-                    "model_type": "chat"
+                    "model_type": "image2text"
                },
                {
                    "llm_name": "claude-3-5-sonnet-20241022",
-                    "tags": "LLM,CHAT,200k",
+                    "tags": "LLM,IMAGE2TEXT,200k",
                    "max_tokens": 204800,
                    "model_type": "chat"
                },
                {
                    "llm_name": "claude-3-opus-20240229",
-                    "tags": "LLM,CHAT,200k",
-                    "max_tokens": 204800,
-                    "model_type": "chat"
-                },
-                {
-                    "llm_name": "claude-3-sonnet-20240229",
-                    "tags": "LLM,CHAT,200k",
+                    "tags": "LLM,IMAGE2TEXT,200k",
                    "max_tokens": 204800,
                    "model_type": "chat"
                },
                {
                    "llm_name": "claude-3-haiku-20240307",
-                    "tags": "LLM,CHAT,200k",
+                    "tags": "LLM,IMAGE2TEXT,200k",
                    "max_tokens": 204800,
-                    "model_type": "chat"
+                    "model_type": "image2text"
                },
                {
                    "llm_name": "claude-2.1",
@ -3209,12 +3203,6 @@
                    "tags": "LLM,CHAT,100k",
                    "max_tokens": 102400,
                    "model_type": "chat"
-                },
-                {
-                    "llm_name": "claude-3-5-sonnet-20241022",
-                    "tags": "LLM,CHAT,200k",
-                    "max_tokens": 102400,
-                    "model_type": "chat"
                }
            ]
        },
--- a/rag/llm/init.py
+++ b/rag/llm/init.py
@ -106,6 +106,7 @@ from .cv_model import (
    TogetherAICV,
    YiCV,
    HunyuanCV,
+    AnthropicCV
 )

 from .rerank_model import (
@ -198,6 +199,7 @@ CvModel = {
    "TogetherAI": TogetherAICV,
    "01.AI": YiCV,
    "Tencent Hunyuan": HunyuanCV,
+    "Anthropic": AnthropicCV,
 }

 ChatModel = {
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@ -1443,6 +1443,9 @@ class AnthropicChat(Base):
            del gen_conf["presence_penalty"]
        if "frequency_penalty" in gen_conf:
            del gen_conf["frequency_penalty"]
+        gen_conf["max_tokens"] = 8196
+        if "haiku" in self.model_name or "opus" in self.model_name:
+            gen_conf["max_tokens"] = 4096

        ans = ""
        try:
@ -1474,6 +1477,9 @@ class AnthropicChat(Base):
            del gen_conf["presence_penalty"]
        if "frequency_penalty" in gen_conf:
            del gen_conf["frequency_penalty"]
+        gen_conf["max_tokens"] = 8196
+        if "haiku" in self.model_name or "opus" in self.model_name:
+            gen_conf["max_tokens"] = 4096

        ans = ""
        total_tokens = 0
@ -1481,12 +1487,18 @@ class AnthropicChat(Base):
            response = self.client.messages.create(
                model=self.model_name,
                messages=history,
-                system=self.system,
+                system=system,
                stream=True,
                **gen_conf,
            )
            for res in response:
                if res.type == 'content_block_delta':
+                    if res.delta.type == "thinking_delta" and res.delta.thinking:
+                        if ans.find("<think>") < 0:
+                            ans += "<think>"
+                        ans = ans.replace("</think>", "")
+                        ans += res.delta.thinking + "</think>"
+                    else:
                        text = res.delta.text
                        ans += text
                        total_tokens += num_tokens_from_string(text)
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@ -31,6 +31,7 @@ from api.utils import get_uuid
 from api.utils.file_utils import get_project_base_directory
 from rag.nlp import is_english
 from rag.prompts import vision_llm_describe_prompt
+from rag.utils import num_tokens_from_string


 class Base(ABC):
@ -899,3 +900,125 @@ class HunyuanCV(Base):
                ],
            }
        ]
+
+
+class AnthropicCV(Base):
+    def __init__(self, key, model_name, base_url=None):
+        import anthropic
+
+        self.client = anthropic.Anthropic(api_key=key)
+        self.model_name = model_name
+        self.system = ""
+        self.max_tokens = 8192
+        if "haiku" in self.model_name or "opus" in self.model_name:
+            self.max_tokens = 4096
+
+    def prompt(self, b64, prompt):
+        return [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/jpeg",
+                            "data": b64,
+                        },
+                    },
+                    {
+                        "type": "text",
+                        "text": prompt
+                    }
+                ],
+            }
+        ]
+
+    def describe(self, image):
+        b64 = self.image2base64(image)
+        prompt = self.prompt(b64,
+                             "请用中文详细描述一下图中的内容，比如时间，地点，人物，事情，人物心情等，如果有数据请提取出数据。" if self.lang.lower() == "chinese" else
+                             "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
+                             )
+
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            messages=prompt
+        )
+        return response["content"][0]["text"].strip(), response["usage"]["input_tokens"]+response["usage"]["output_tokens"]
+
+    def describe_with_prompt(self, image, prompt=None):
+        b64 = self.image2base64(image)
+        prompt = self.prompt(b64, prompt if prompt else vision_llm_describe_prompt())
+
+        response = self.client.messages.create(
+            model=self.model_name,
+            max_tokens=self.max_tokens,
+            messages=prompt
+        )
+        return response["content"][0]["text"].strip(), response["usage"]["input_tokens"]+response["usage"]["output_tokens"]
+
+    def chat(self, system, history, gen_conf):
+        if "presence_penalty" in gen_conf:
+            del gen_conf["presence_penalty"]
+        if "frequency_penalty" in gen_conf:
+            del gen_conf["frequency_penalty"]
+        gen_conf["max_tokens"] = self.max_tokens
+
+        ans = ""
+        try:
+            response = self.client.messages.create(
+                model=self.model_name,
+                messages=history,
+                system=system,
+                stream=False,
+                **gen_conf,
+            ).to_dict()
+            ans = response["content"][0]["text"]
+            if response["stop_reason"] == "max_tokens":
+                ans += (
+                    "...\nFor the content length reason, it stopped, continue?"
+                    if is_english([ans])
+                    else "······\n由于长度的原因，回答被截断了，要继续吗？"
+                )
+            return (
+                ans,
+                response["usage"]["input_tokens"] + response["usage"]["output_tokens"],
+            )
+        except Exception as e:
+            return ans + "\n**ERROR**: " + str(e), 0
+
+    def chat_streamly(self, system, history, gen_conf):
+        if "presence_penalty" in gen_conf:
+            del gen_conf["presence_penalty"]
+        if "frequency_penalty" in gen_conf:
+            del gen_conf["frequency_penalty"]
+        gen_conf["max_tokens"] = self.max_tokens
+
+        ans = ""
+        total_tokens = 0
+        try:
+            response = self.client.messages.create(
+                model=self.model_name,
+                messages=history,
+                system=system,
+                stream=True,
+                **gen_conf,
+            )
+            for res in response:
+                if res.type == 'content_block_delta':
+                    if res.delta.type == "thinking_delta" and res.delta.thinking:
+                        if ans.find("<think>") < 0:
+                            ans += "<think>"
+                        ans = ans.replace("</think>", "")
+                        ans += res.delta.thinking + "</think>"
+                    else:
+                        text = res.delta.text
+                        ans += text
+                        total_tokens += num_tokens_from_string(text)
+                    yield ans
+        except Exception as e:
+            yield ans + "\n**ERROR**: " + str(e)
+
+        yield total_tokens