merge main

This commit is contained in:
JzoNg 2024-09-27 16:27:00 +08:00
commit 8fd04e5313
171 changed files with 1964 additions and 489 deletions

46
.github/workflows/web-tests.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: Web Tests
on:
pull_request:
branches:
- main
paths:
- web/**
concurrency:
group: web-tests-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
test:
name: Web Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./web
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: web/**
- name: Setup Node.js
uses: actions/setup-node@v4
if: steps.changed-files.outputs.any_changed == 'true'
with:
node-version: 20
cache: yarn
cache-dependency-path: ./web/package.json
- name: Install dependencies
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn install --frozen-lockfile
- name: Run tests
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn test

View File

@ -53,11 +53,9 @@ from services.account_service import AccountService
warnings.simplefilter("ignore", ResourceWarning) warnings.simplefilter("ignore", ResourceWarning)
# fix windows platform os.environ["TZ"] = "UTC"
if os.name == "nt": # windows platform not support tzset
os.system('tzutil /s "UTC"') if hasattr(time, "tzset"):
else:
os.environ["TZ"] = "UTC"
time.tzset() time.tzset()

View File

@ -309,7 +309,7 @@ class AppRunner:
if not prompt_messages: if not prompt_messages:
prompt_messages = result.prompt_messages prompt_messages = result.prompt_messages
if not usage and result.delta.usage: if result.delta.usage:
usage = result.delta.usage usage = result.delta.usage
if not usage: if not usage:

View File

@ -5,6 +5,7 @@ from typing import Optional, cast
import numpy as np import numpy as np
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
for i in range(0, len(embedding_queue_texts), max_chunks): for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks] batch_texts = embedding_queue_texts[i : i + max_chunks]
embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user) embedding_result = self._model_instance.invoke_text_embedding(
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
)
for vector in embedding_result.embeddings: for vector in embedding_result.embeddings:
try: try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
redis_client.expire(embedding_cache_key, 600) redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float")) return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try: try:
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user) embedding_result = self._model_instance.invoke_text_embedding(
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
)
embedding_results = embedding_result.embeddings[0] embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist() embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()

View File

@ -0,0 +1,10 @@
from enum import Enum
class EmbeddingInputType(Enum):
"""
Enum for embedding input type.
"""
DOCUMENT = "document"
QUERY = "query"

View File

@ -3,6 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast from typing import IO, Optional, Union, cast
from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
tools=tools, tools=tools,
) )
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult: def invoke_text_embedding(
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke large language model
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
if not isinstance(self.model_type_instance, TextEmbeddingModel): if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
credentials=self.credentials, credentials=self.credentials,
texts=texts, texts=texts,
user=user, user=user,
input_type=input_type,
) )
def get_text_embedding_num_tokens(self, texts: list[str]) -> int: def get_text_embedding_num_tokens(self, texts: list[str]) -> int:

View File

@ -4,6 +4,7 @@ from typing import Optional
from pydantic import ConfigDict from pydantic import ConfigDict
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
def invoke( def invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke text embedding model
:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
self.started_at = time.perf_counter() self.started_at = time.perf_counter()
try: try:
return self._invoke(model, credentials, texts, user) return self._invoke(model, credentials, texts, user, input_type)
except Exception as e: except Exception as e:
raise self._transform_invoke_error(e) raise self._transform_invoke_error(e)
@abstractmethod @abstractmethod
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke text embedding model
:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
raise NotImplementedError raise NotImplementedError

View File

@ -7,6 +7,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import AzureOpenAI from openai import AzureOpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
base_model_name = credentials["base_model_name"] base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs) client = AzureOpenAI(**credentials_kwargs)

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "http://api.baichuan-ai.com/v1/embeddings" api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]

View File

@ -13,6 +13,7 @@ from botocore.exceptions import (
UnknownServiceError, UnknownServiceError,
) )
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
class BedrockTextEmbeddingModel(TextEmbeddingModel): class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
client_config = Config(region_name=credentials["aws_region"]) client_config = Config(region_name=credentials["aws_region"])

View File

@ -5,6 +5,7 @@ import cohere
import numpy as np import numpy as np
from cohere.core import RequestOptions from cohere.core import RequestOptions
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -15,6 +15,7 @@ help:
en_US: https://fireworks.ai/account/api-keys en_US: https://fireworks.ai/account/api-keys
supported_model_types: supported_model_types:
- llm - llm
- text-embedding
configurate_methods: configurate_methods:
- predefined-model - predefined-model
provider_credential_schema: provider_credential_schema:

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
label:
zh_Hans: Llama 3.2 11B Vision Instruct
en_US: Llama 3.2 11B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.2'
output: '0.2'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-1b-instruct
label:
zh_Hans: Llama 3.2 1B Instruct
en_US: Llama 3.2 1B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
label:
zh_Hans: Llama 3.2 90B Vision Instruct
en_US: Llama 3.2 90B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,12 @@
model: WhereIsAI/UAE-Large-V1
label:
zh_Hans: UAE-Large-V1
en_US: UAE-Large-V1
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-base
label:
zh_Hans: GTE-base
en_US: GTE-base
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-large
label:
zh_Hans: GTE-large
en_US: GTE-large
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1.5
label:
zh_Hans: nomic-embed-text-v1.5
en_US: nomic-embed-text-v1.5
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1
label:
zh_Hans: nomic-embed-text-v1
en_US: nomic-embed-text-v1
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,151 @@
import time
from collections.abc import Mapping
from typing import Optional, Union
import numpy as np
from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
"""
Model class for Fireworks text embedding model.
"""
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
extra_model_kwargs = {}
if user:
extra_model_kwargs["user"] = user
extra_model_kwargs["encoding_format"] = "float"
context_size = self._get_context_size(model, credentials)
max_chunks = self._get_max_chunks(model, credentials)
inputs = []
indices = []
used_tokens = 0
for i, text in enumerate(texts):
# Here token count is only an approximation based on the GPT2 tokenizer
# TODO: Optimize for better token estimation and chunking
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0:cutoff])
else:
inputs.append(text)
indices += [i]
batched_embeddings = []
_iter = range(0, len(inputs), max_chunks)
for i in _iter:
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=inputs[i : i + max_chunks],
extra_model_kwargs=extra_model_kwargs,
)
used_tokens += embedding_used_tokens
batched_embeddings += embeddings_batch
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: Mapping) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
# call embedding model
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _embedding_invoke(
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
) -> tuple[list[list[float]], int]:
"""
Invoke embedding model
:param model: model name
:param client: model client
:param texts: texts to embed
:param extra_model_kwargs: extra model kwargs
:return: embeddings and used tokens
"""
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
return [data.embedding for data in response.data], response.usage.total_tokens
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
input_price_info = self.get_price(
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
)
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-001
label:
en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-002
label:
en_US: Gemini 1.5 Flash 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-8b-exp-0924
label:
en_US: Gemini 1.5 Flash 8B 0924
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest model: gemini-1.5-flash-latest
label: label:
en_US: Gemini 1.5 Flash en_US: Gemini 1.5 Flash Latest
model_type: llm model_type: llm
features: features:
- agent-thought - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash
label:
en_US: Gemini 1.5 Flash
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-001
label:
en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-002
label:
en_US: Gemini 1.5 Pro 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest model: gemini-1.5-pro-latest
label: label:
en_US: Gemini 1.5 Pro en_US: Gemini 1.5 Pro Latest
model_type: llm model_type: llm
features: features:
- agent-thought - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro
label:
en_US: Gemini 1.5 Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -27,6 +27,15 @@ parameter_rules:
default: 4096 default: 4096
min: 1 min: 1
max: 4096 max: 4096
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -31,6 +31,15 @@ parameter_rules:
max: 2048 max: 2048
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai import google.generativeai as genai
import requests import requests
from google.api_core import exceptions from google.api_core import exceptions
from google.generativeai import client from google.generativeai.client import _ClientManager
from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part from google.generativeai.types.content_types import to_part
from PIL import Image from PIL import Image
@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content) history.append(content)
# Create a new ClientManager with tenant's API key # Create a new ClientManager with tenant's API key
new_client_manager = client._ClientManager() new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"]) new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative") new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client google_model._client = new_custom_client
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}
response = google_model.generate_content( response = google_model.generate_content(
contents=history, contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs), generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream, stream=stream,
safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None, tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600}, request_options={"timeout": 600},
) )

View File

@ -0,0 +1,25 @@
model: llama-3.2-11b-text-preview
label:
zh_Hans: Llama 3.2 11B Text (Preview)
en_US: Llama 3.2 11B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-1b-preview
label:
zh_Hans: Llama 3.2 1B Text (Preview)
en_US: Llama 3.2 1B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-3b-preview
label:
zh_Hans: Llama 3.2 3B Text (Preview)
en_US: Llama 3.2 3B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-90b-text-preview
label:
zh_Hans: Llama 3.2 90B Text (Preview)
en_US: Llama 3.2 90B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -6,6 +6,7 @@ import numpy as np
import requests import requests
from huggingface_hub import HfApi, InferenceClient from huggingface_hub import HfApi, InferenceClient
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel): class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client = InferenceClient(token=credentials["huggingfacehub_api_token"]) client = InferenceClient(token=credentials["huggingfacehub_api_token"])
execute_model = model execute_model = model

View File

@ -1,6 +1,7 @@
import time import time
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
server_url = credentials["server_url"] server_url = credentials["server_url"]

View File

@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -67,46 +67,3 @@ model_credential_schema:
required: false required: false
type: text-input type: text-input
default: '8192' default: '8192'
- variable: task
label:
zh_Hans: 下游任务
en_US: Downstream task
placeholder:
zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
required: false
type: select
options:
- value: retrieval.query
label:
en_US: retrieval.query
- value: retrieval.passage
label:
en_US: retrieval.passage
- value: separation
label:
en_US: separation
- value: classification
label:
en_US: classification
- value: text-matching
label:
en_US: text-matching
- variable: dimensions
label:
zh_Hans: 输出维度
en_US: Output dimensions
placeholder:
zh_Hans: 输入您的输出维度
en_US: Enter output dimensions
required: false
type: text-input
- variable: late_chunking
label:
zh_Hans: 后期分块
en_US: Late chunking
placeholder:
zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
required: false
type: switch

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1" api_base: str = "https://api.jina.ai/v1"
def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict: def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
""" """
Parse model credentials Parse model credentials
@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]} data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
task = credentials.get("task") # model specific parameters
dimensions = credentials.get("dimensions") if model == "jina-embeddings-v3":
late_chunking = credentials.get("late_chunking") # set `task` type according to input type for the best performance
data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
if task is not None:
data["task"] = task
if dimensions is not None:
data["dimensions"] = int(dimensions)
if late_chunking is not None:
data["late_chunking"] = late_chunking
return data return data
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]
@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings" url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"} headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
data = self._to_payload(model=model, texts=texts, credentials=credentials) data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
try: try:
response = post(url, headers=headers, data=dumps(data)) response = post(url, headers=headers, data=dumps(data))

View File

@ -5,6 +5,7 @@ from typing import Optional
from requests import post from requests import post
from yarl import URL from yarl import URL
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
class LocalAITextEmbeddingModel(TextEmbeddingModel): class LocalAITextEmbeddingModel(TextEmbeddingModel):
""" """
Model class for Jina text embedding model. Model class for LocalAI text embedding model.
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
if len(texts) != 1: if len(texts) != 1:

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.minimax.chat/v1/embeddings" api_base: str = "https://api.minimax.chat/v1/embeddings"
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["minimax_api_key"] api_key = credentials["minimax_api_key"]

View File

@ -4,6 +4,7 @@ from typing import Optional
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.mixedbread.ai/v1" api_base: str = "https://api.mixedbread.ai/v1"
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]

View File

@ -5,6 +5,7 @@ from typing import Optional
from nomic import embed from nomic import embed
from nomic import login as nomic_login from nomic import login as nomic_login
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import ( from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage, EmbeddingUsage,
@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
credentials: dict, credentials: dict,
texts: list[str], texts: list[str],
user: Optional[str] = None, user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
embeddings, prompt_tokens, total_tokens = self.embed_text( embeddings, prompt_tokens, total_tokens = self.embed_text(

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
models: list[str] = ["NV-Embed-QA"] models: list[str] = ["NV-Embed-QA"]
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]

View File

@ -6,6 +6,7 @@ from typing import Optional
import numpy as np import numpy as np
import oci import oci
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
if chunk_json["done"]: if chunk_json["done"]:
# calculate num tokens # calculate num tokens
if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: if "prompt_eval_count" in chunk_json:
# transform usage
prompt_tokens = chunk_json["prompt_eval_count"] prompt_tokens = chunk_json["prompt_eval_count"]
completion_tokens = chunk_json["eval_count"]
else: else:
# calculate num tokens prompt_message_content = prompt_messages[0].content
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) if isinstance(prompt_message_content, str):
completion_tokens = self._get_num_tokens_by_gpt2(full_text) prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
else:
content_text = ""
for message_content in prompt_message_content:
if message_content.type == PromptMessageContentType.TEXT:
message_content = cast(TextPromptMessageContent, message_content)
content_text += message_content.data
prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
# transform usage # transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)

View File

@ -8,6 +8,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -6,6 +6,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import OpenAI from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# transform credentials to kwargs for model instance # transform credentials to kwargs for model instance

View File

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -5,6 +5,7 @@ from typing import Optional
from requests import post from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
server_url = credentials["server_url"] server_url = credentials["server_url"]

View File

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -4,6 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient from replicate import Client as ReplicateClient
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel): class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30) client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
if "model_version" in credentials: if "model_version" in credentials:

View File

@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
Model class for Cohere large language model. Model class for Cohere large language model.
""" """
sagemaker_client: Any = None sagemaker_session: Any = None
predictor: Any = None predictor: Any = None
sagemaker_endpoint: str = None
def _handle_chat_generate_response( def _handle_chat_generate_response(
self, self,
@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
:param user: unique user id :param user: unique user id
:return: full response or stream response chunk generator result :return: full response or stream response chunk generator result
""" """
if not self.sagemaker_client: if not self.sagemaker_session:
access_key = credentials.get("aws_access_key_id") access_key = credentials.get("aws_access_key_id")
secret_key = credentials.get("aws_secret_access_key") secret_key = credentials.get("aws_secret_access_key")
aws_region = credentials.get("aws_region") aws_region = credentials.get("aws_region")
@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
else: else:
boto_session = boto3.Session() boto_session = boto3.Session()
self.sagemaker_client = boto_session.client("sagemaker") sagemaker_client = boto_session.client("sagemaker")
sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client) self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
self.predictor = Predictor( self.predictor = Predictor(
endpoint_name=credentials.get("sagemaker_endpoint"), endpoint_name=self.sagemaker_endpoint,
sagemaker_session=sagemaker_session, sagemaker_session=self.sagemaker_session,
serializer=serializers.JSONSerializer(), serializer=serializers.JSONSerializer(),
) )

View File

@ -6,6 +6,7 @@ from typing import Any, Optional
import boto3 import boto3
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
return embeddings return embeddings
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -1,25 +1,38 @@
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-72B-Instruct - Qwen/Qwen2.5-72B-Instruct
- Qwen/Qwen2.5-Math-72B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-Coder-7B-Instruct
- deepseek-ai/DeepSeek-V2.5
- Qwen/Qwen2-72B-Instruct - Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct - Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct - Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct - Qwen/Qwen2-1.5B-Instruct
- 01-ai/Yi-1.5-34B-Chat
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- THUDM/glm-4-9b-chat
- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat - deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct - deepseek-ai/DeepSeek-Coder-V2-Instruct
- THUDM/glm-4-9b-chat
- THUDM/chatglm3-6b
- 01-ai/Yi-1.5-34B-Chat-16K
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2_5-7b-chat - internlm/internlm2_5-7b-chat
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-405B-Instruct - meta-llama/Meta-Llama-3.1-405B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct - meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mixtral-8x7B-Instruct-v0.1 - meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2 - mistralai/Mistral-7B-Instruct-v0.2
- Pro/Qwen/Qwen2-7B-Instruct
- Pro/Qwen/Qwen2-1.5B-Instruct
- Pro/THUDM/glm-4-9b-chat
- Pro/THUDM/chatglm3-6b
- Pro/01-ai/Yi-1.5-9B-Chat-16K
- Pro/01-ai/Yi-1.5-6B-Chat
- Pro/internlm/internlm2_5-7b-chat
- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
- Pro/meta-llama/Meta-Llama-3-8B-Instruct
- Pro/google/gemma-2-9b-it

View File

@ -28,3 +28,4 @@ pricing:
output: '0' output: '0'
unit: '0.000001' unit: '0.000001'
currency: RMB currency: RMB
deprecated: true

View File

@ -28,3 +28,4 @@ pricing:
output: '1.26' output: '1.26'
unit: '0.000001' unit: '0.000001'
currency: RMB currency: RMB
deprecated: true

View File

@ -1,5 +1,6 @@
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel, OAICompatEmbeddingModel,
@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
super().validate_credentials(model, credentials) super().validate_credentials(model, credentials)
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
self._add_custom_parameters(credentials) self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user) return super()._invoke(model, credentials, texts, user)

View File

@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
:param prompt_messages: prompt messages :param prompt_messages: prompt messages
:return: llm response chunk generator result :return: llm response chunk generator result
""" """
completion = ""
for index, content in enumerate(client.subscribe()): for index, content in enumerate(client.subscribe()):
if isinstance(content, dict): if isinstance(content, dict):
delta = content["data"] delta = content["data"]
else: else:
delta = content delta = content
completion += delta
assistant_prompt_message = AssistantPromptMessage( assistant_prompt_message = AssistantPromptMessage(
content=delta or "", content=delta or "",
) )
temp_assistant_prompt_message = AssistantPromptMessage(
content=completion,
)
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
# transform usage # transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: farui-plus model: farui-plus
label: label:
en_US: farui-plus en_US: farui-plus

View File

@ -18,7 +18,7 @@ from dashscope.common.error import (
UnsupportedModel, UnsupportedModel,
) )
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.message_entities import (
AssistantPromptMessage, AssistantPromptMessage,
ImagePromptMessageContent, ImagePromptMessageContent,
@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
FetchFrom, FetchFrom,
I18nObject, I18nObject,
ModelFeature, ModelFeature,
ModelPropertyKey,
ModelType, ModelType,
ParameterRule, ParameterRule,
ParameterType, ParameterType,
@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling :param tools: tools for tool calling
:return: :return:
""" """
# Check if the model was added via get_customizable_model_schema
if self.get_customizable_model_schema(model, credentials) is not None:
# For custom models, tokens are not calculated.
return 0
if model in {"qwen-turbo-chat", "qwen-plus-chat"}: if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
model = model.replace("-chat", "") model = model.replace("-chat", "")
if model == "farui-plus": if model == "farui-plus":
@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param credentials: model credentials :param credentials: model credentials
:return: AIModelEntity or None :return: AIModelEntity or None
""" """
rules = [ return AIModelEntity(
model=model,
label=I18nObject(en_US=model, zh_Hans=model),
model_type=ModelType.LLM,
features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
if credentials.get("function_calling_type") == "tool_call"
else [],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
ModelPropertyKey.MODE: LLMMode.CHAT.value,
},
parameter_rules=[
ParameterRule( ParameterRule(
name="temperature", name="temperature",
type=ParameterType.FLOAT,
use_template="temperature", use_template="temperature",
label=I18nObject(zh_Hans="温度", en_US="Temperature"), label=I18nObject(en_US="Temperature", zh_Hans="温度"),
),
ParameterRule(
name="top_p",
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
use_template="top_p",
label=I18nObject(zh_Hans="Top P", en_US="Top P"),
),
ParameterRule(
name="top_k",
type=ParameterType.INT,
min=0,
max=99,
label=I18nObject(zh_Hans="top_k", en_US="top_k"),
), ),
ParameterRule( ParameterRule(
name="max_tokens", name="max_tokens",
type=ParameterType.INT, use_template="max_tokens",
default=512,
min=1, min=1,
max=128000, max=int(credentials.get("max_tokens", 1024)),
default=1024, label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
),
ParameterRule(
name="seed",
type=ParameterType.INT, type=ParameterType.INT,
default=1234,
label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
), ),
ParameterRule( ParameterRule(
name="repetition_penalty", name="top_p",
use_template="top_p",
label=I18nObject(en_US="Top P", zh_Hans="Top P"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
default=1.1,
label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
), ),
] ParameterRule(
name="top_k",
entity = AIModelEntity( use_template="top_k",
model=model, label=I18nObject(en_US="Top K", zh_Hans="Top K"),
label=I18nObject(en_US=model), type=ParameterType.FLOAT,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, ),
model_type=ModelType.LLM, ParameterRule(
model_properties={}, name="frequency_penalty",
parameter_rules=rules, use_template="frequency_penalty",
label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
type=ParameterType.FLOAT,
),
],
) )
return entity

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-0919 model: qwen-coder-turbo-0919
label: label:
en_US: qwen-coder-turbo-0919 en_US: qwen-coder-turbo-0919

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-latest model: qwen-coder-turbo-latest
label: label:
en_US: qwen-coder-turbo-latest en_US: qwen-coder-turbo-latest

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo model: qwen-coder-turbo
label: label:
en_US: qwen-coder-turbo en_US: qwen-coder-turbo

View File

@ -1,4 +1,4 @@
# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6 # for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-long model: qwen-long
label: label:
en_US: qwen-long en_US: qwen-long

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0816 model: qwen-math-plus-0816
label: label:
en_US: qwen-math-plus-0816 en_US: qwen-math-plus-0816

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0919 model: qwen-math-plus-0919
label: label:
en_US: qwen-math-plus-0919 en_US: qwen-math-plus-0919

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-latest model: qwen-math-plus-latest
label: label:
en_US: qwen-math-plus-latest en_US: qwen-math-plus-latest

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus model: qwen-math-plus
label: label:
en_US: qwen-math-plus en_US: qwen-math-plus

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-0919 model: qwen-math-turbo-0919
label: label:
en_US: qwen-math-turbo-0919 en_US: qwen-math-turbo-0919

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-latest model: qwen-math-turbo-latest
label: label:
en_US: qwen-math-turbo-latest en_US: qwen-math-turbo-latest

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo model: qwen-math-turbo
label: label:
en_US: qwen-math-turbo en_US: qwen-math-turbo

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0107 model: qwen-max-0107
label: label:
en_US: qwen-max-0107 en_US: qwen-max-0107

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max-0403, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0403 model: qwen-max-0403
label: label:
en_US: qwen-max-0403 en_US: qwen-max-0403

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max-0428, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0428 model: qwen-max-0428
label: label:
en_US: qwen-max-0428 en_US: qwen-max-0428

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max-0919, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0919 model: qwen-max-0919
label: label:
en_US: qwen-max-0919 en_US: qwen-max-0919

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-1201 model: qwen-max-1201
label: label:
en_US: qwen-max-1201 en_US: qwen-max-1201

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-latest model: qwen-max-latest
label: label:
en_US: qwen-max-latest en_US: qwen-max-latest

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-longcontext model: qwen-max-longcontext
label: label:
en_US: qwen-max-longcontext en_US: qwen-max-longcontext

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-max, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max model: qwen-max
label: label:
en_US: qwen-max en_US: qwen-max

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-0206, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0206 model: qwen-plus-0206
label: label:
en_US: qwen-plus-0206 en_US: qwen-plus-0206

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-0624, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0624 model: qwen-plus-0624
label: label:
en_US: qwen-plus-0624 en_US: qwen-plus-0624

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-0723, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0723 model: qwen-plus-0723
label: label:
en_US: qwen-plus-0723 en_US: qwen-plus-0723

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-0806, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0806 model: qwen-plus-0806
label: label:
en_US: qwen-plus-0806 en_US: qwen-plus-0806

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-0919, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0919 model: qwen-plus-0919
label: label:
en_US: qwen-plus-0919 en_US: qwen-plus-0919

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-chat model: qwen-plus-chat
label: label:
en_US: qwen-plus-chat en_US: qwen-plus-chat

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus-latest, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-latest model: qwen-plus-latest
label: label:
en_US: qwen-plus-latest en_US: qwen-plus-latest

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-plus, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus model: qwen-plus
label: label:
en_US: qwen-plus en_US: qwen-plus

View File

@ -1,3 +1,6 @@
# this model corresponds to qwen-turbo-0206, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0206 model: qwen-turbo-0206
label: label:
en_US: qwen-turbo-0206 en_US: qwen-turbo-0206

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-turbo-0624, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0624 model: qwen-turbo-0624
label: label:
en_US: qwen-turbo-0624 en_US: qwen-turbo-0624

View File

@ -1,3 +1,5 @@
# this model corresponds to qwen-turbo-0919, for more details
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0919 model: qwen-turbo-0919
label: label:
en_US: qwen-turbo-0919 en_US: qwen-turbo-0919

Some files were not shown because too many files have changed in this diff Show More