fix: baichuan max chunks (#1990)

2025-07-23 21:24:26 +08:00 · 2024-01-10 23:13:35 +08:00 · 2024-01-10 23:13:35 +08:00 · ebd11e7482
commit ebd11e7482
parent 94626487db
2 changed files with 85 additions and 11 deletions
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Tuple
 from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult, EmbeddingUsage
@ -38,6 +38,50 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
            raise ValueError('Invalid model name')
        if not api_key:
            raise CredentialsValidateFailedError('api_key is required')
        # split into chunks of batch size 16
        chunks = []
        for i in range(0, len(texts), 16):
            chunks.append(texts[i:i + 16])
        embeddings = []
        token_usage = 0
        for chunk in chunks:
            # embeding chunk
            chunk_embeddings, chunk_usage = self.embedding(
                model=model,
                api_key=api_key,
                texts=chunk,
                user=user
            )
            embeddings.extend(chunk_embeddings)
            token_usage += chunk_usage
        result = TextEmbeddingResult(
            model=model,
            embeddings=embeddings,
            usage=self._calc_response_usage(
                model=model,
                credentials=credentials,
                tokens=token_usage
            )
        )
        return result
    def embedding(self, model: str, api_key, texts: list[str], user: Optional[str] = None) \
            -> Tuple[list[list[float]], int]:
        """
        Embed given texts
        :param model: model name
        :param credentials: model credentials
        :param texts: texts to embed
        :param user: unique user id
        :return: embeddings result
        """
        url = self.api_base
        headers = {
            'Authorization': 'Bearer ' + api_key,
@ -85,17 +129,10 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
        except Exception as e:
            raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
-        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
+        return [
            data['embedding'] for data in embeddings
        ], usage['total_tokens']
        result = TextEmbeddingResult(
            model=model,
            embeddings=[[
                float(data) for data in x['embedding']
            ] for x in embeddings],
            usage=usage
        )
        return result
    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
        """
--- a/api/tests/integration_tests/model_runtime/baichuan/test_text_embedding.py
+++ b/api/tests/integration_tests/model_runtime/baichuan/test_text_embedding.py
@ -59,3 +59,40 @@ def test_get_num_tokens():
    )
    assert num_tokens == 2
 def test_max_chunks():
    model = BaichuanTextEmbeddingModel()
    result = model.invoke(
        model='baichuan-text-embedding',
        credentials={
            'api_key': os.environ.get('BAICHUAN_API_KEY'),
        },
        texts=[
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
            "hello",
            "world",
        ]
    )
    assert isinstance(result, TextEmbeddingResult)
    assert len(result.embeddings) == 22