Merge branch 'main' into tp

This commit is contained in:
JzoNg 2024-09-27 16:47:57 +08:00
commit 3b48f8c98e
257 changed files with 4098 additions and 874 deletions

46
.github/workflows/web-tests.yml vendored Normal file
View File

@ -0,0 +1,46 @@
name: Web Tests
on:
pull_request:
branches:
- main
paths:
- web/**
concurrency:
group: web-tests-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
test:
name: Web Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./web
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: web/**
- name: Setup Node.js
uses: actions/setup-node@v4
if: steps.changed-files.outputs.any_changed == 'true'
with:
node-version: 20
cache: yarn
cache-dependency-path: ./web/package.json
- name: Install dependencies
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn install --frozen-lockfile
- name: Run tests
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn test

View File

@ -53,11 +53,9 @@ from services.account_service import AccountService
warnings.simplefilter("ignore", ResourceWarning) warnings.simplefilter("ignore", ResourceWarning)
# fix windows platform os.environ["TZ"] = "UTC"
if os.name == "nt": # windows platform not support tzset
os.system('tzutil /s "UTC"') if hasattr(time, "tzset"):
else:
os.environ["TZ"] = "UTC"
time.tzset() time.tzset()

View File

@ -652,7 +652,7 @@ where sites.id is null limit 1000"""
app_was_created.send(app, account=account) app_was_created.send(app, account=account)
except Exception as e: except Exception as e:
failed_app_ids.append(app_id) failed_app_ids.append(app_id)
click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red")) click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
logging.exception(f"Fix app related site missing issue failed, error: {e}") logging.exception(f"Fix app related site missing issue failed, error: {e}")
continue continue

View File

@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
:return: :return:
""" """
# show_retrieve_source # show_retrieve_source
updated_resources = []
if "retriever_resources" in metadata: if "retriever_resources" in metadata:
metadata["retriever_resources"] = []
for resource in metadata["retriever_resources"]: for resource in metadata["retriever_resources"]:
metadata["retriever_resources"].append( updated_resources.append(
{ {
"segment_id": resource["segment_id"], "segment_id": resource["segment_id"],
"position": resource["position"], "position": resource["position"],
@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
"content": resource["content"], "content": resource["content"],
} }
) )
metadata["retriever_resources"] = updated_resources
# show annotation reply # show annotation reply
if "annotation_reply" in metadata: if "annotation_reply" in metadata:

View File

@ -309,7 +309,7 @@ class AppRunner:
if not prompt_messages: if not prompt_messages:
prompt_messages = result.prompt_messages prompt_messages = result.prompt_messages
if not usage and result.delta.usage: if result.delta.usage:
usage = result.delta.usage usage = result.delta.usage
if not usage: if not usage:

View File

@ -5,6 +5,7 @@ from typing import Optional, cast
import numpy as np import numpy as np
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
for i in range(0, len(embedding_queue_texts), max_chunks): for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks] batch_texts = embedding_queue_texts[i : i + max_chunks]
embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user) embedding_result = self._model_instance.invoke_text_embedding(
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
)
for vector in embedding_result.embeddings: for vector in embedding_result.embeddings:
try: try:
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
redis_client.expire(embedding_cache_key, 600) redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float")) return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try: try:
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user) embedding_result = self._model_instance.invoke_text_embedding(
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
)
embedding_results = embedding_result.embeddings[0] embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist() embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()

View File

@ -0,0 +1,10 @@
from enum import Enum
class EmbeddingInputType(Enum):
"""
Enum for embedding input type.
"""
DOCUMENT = "document"
QUERY = "query"

View File

@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, " "Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n" "and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response" "MAKE SURE your output is the SAME language as the Assistant's latest response"
"(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
"The output must be an array in JSON format following the specified schema:\n" "The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n' '["question1","question2","question3"]\n'
) )

View File

@ -3,6 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast from typing import IO, Optional, Union, cast
from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError from core.errors.error import ProviderTokenNotInitError
@ -158,12 +159,15 @@ class ModelInstance:
tools=tools, tools=tools,
) )
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult: def invoke_text_embedding(
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke large language model
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
if not isinstance(self.model_type_instance, TextEmbeddingModel): if not isinstance(self.model_type_instance, TextEmbeddingModel):
@ -176,6 +180,7 @@ class ModelInstance:
credentials=self.credentials, credentials=self.credentials,
texts=texts, texts=texts,
user=user, user=user,
input_type=input_type,
) )
def get_text_embedding_num_tokens(self, texts: list[str]) -> int: def get_text_embedding_num_tokens(self, texts: list[str]) -> int:

View File

@ -4,6 +4,7 @@ from typing import Optional
from pydantic import ConfigDict from pydantic import ConfigDict
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel from core.model_runtime.model_providers.__base.ai_model import AIModel
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
def invoke( def invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke text embedding model
:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
self.started_at = time.perf_counter() self.started_at = time.perf_counter()
try: try:
return self._invoke(model, credentials, texts, user) return self._invoke(model, credentials, texts, user, input_type)
except Exception as e: except Exception as e:
raise self._transform_invoke_error(e) raise self._transform_invoke_error(e)
@abstractmethod @abstractmethod
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke large language model Invoke text embedding model
:param model: model name :param model: model name
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
raise NotImplementedError raise NotImplementedError

View File

@ -38,3 +38,5 @@
- perfxcloud - perfxcloud
- zhinao - zhinao
- fireworks - fireworks
- mixedbread
- nomic

View File

@ -7,6 +7,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import AzureOpenAI from openai import AzureOpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
base_model_name = credentials["base_model_name"] base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials) credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs) client = AzureOpenAI(**credentials_kwargs)

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "http://api.baichuan-ai.com/v1/embeddings" api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]

View File

@ -13,6 +13,7 @@ from botocore.exceptions import (
UnknownServiceError, UnknownServiceError,
) )
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
class BedrockTextEmbeddingModel(TextEmbeddingModel): class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
client_config = Config(region_name=credentials["aws_region"]) client_config = Config(region_name=credentials["aws_region"])

View File

@ -5,6 +5,7 @@ import cohere
import numpy as np import numpy as np
from cohere.core import RequestOptions from cohere.core import RequestOptions
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -15,6 +15,7 @@ help:
en_US: https://fireworks.ai/account/api-keys en_US: https://fireworks.ai/account/api-keys
supported_model_types: supported_model_types:
- llm - llm
- text-embedding
configurate_methods: configurate_methods:
- predefined-model - predefined-model
provider_credential_schema: provider_credential_schema:

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
label:
zh_Hans: Llama 3.2 11B Vision Instruct
en_US: Llama 3.2 11B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.2'
output: '0.2'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-1b-instruct
label:
zh_Hans: Llama 3.2 1B Instruct
en_US: Llama 3.2 1B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-3b-instruct
label:
zh_Hans: Llama 3.2 3B Instruct
en_US: Llama 3.2 3B Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.1'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,46 @@
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
label:
zh_Hans: Llama 3.2 90B Vision Instruct
en_US: Llama 3.2 90B Vision Instruct
model_type: llm
features:
- agent-thought
- tool-call
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
- name: max_tokens
use_template: max_tokens
- name: context_length_exceeded_behavior
default: None
label:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
help:
zh_Hans: 上下文长度超出行为
en_US: Context Length Exceeded Behavior
type: string
options:
- None
- truncate
- error
- name: response_format
use_template: response_format
pricing:
input: '0.9'
output: '0.9'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,12 @@
model: WhereIsAI/UAE-Large-V1
label:
zh_Hans: UAE-Large-V1
en_US: UAE-Large-V1
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-base
label:
zh_Hans: GTE-base
en_US: GTE-base
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: thenlper/gte-large
label:
zh_Hans: GTE-large
en_US: GTE-large
model_type: text-embedding
model_properties:
context_size: 512
max_chunks: 1
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1.5
label:
zh_Hans: nomic-embed-text-v1.5
en_US: nomic-embed-text-v1.5
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,12 @@
model: nomic-ai/nomic-embed-text-v1
label:
zh_Hans: nomic-embed-text-v1
en_US: nomic-embed-text-v1
model_type: text-embedding
model_properties:
context_size: 8192
max_chunks: 16
pricing:
input: '0.008'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,151 @@
import time
from collections.abc import Mapping
from typing import Optional, Union
import numpy as np
from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
"""
Model class for Fireworks text embedding model.
"""
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
extra_model_kwargs = {}
if user:
extra_model_kwargs["user"] = user
extra_model_kwargs["encoding_format"] = "float"
context_size = self._get_context_size(model, credentials)
max_chunks = self._get_max_chunks(model, credentials)
inputs = []
indices = []
used_tokens = 0
for i, text in enumerate(texts):
# Here token count is only an approximation based on the GPT2 tokenizer
# TODO: Optimize for better token estimation and chunking
num_tokens = self._get_num_tokens_by_gpt2(text)
if num_tokens >= context_size:
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
# if num tokens is larger than context length, only use the start
inputs.append(text[0:cutoff])
else:
inputs.append(text)
indices += [i]
batched_embeddings = []
_iter = range(0, len(inputs), max_chunks)
for i in _iter:
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=inputs[i : i + max_chunks],
extra_model_kwargs=extra_model_kwargs,
)
used_tokens += embedding_used_tokens
batched_embeddings += embeddings_batch
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: Mapping) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
# call embedding model
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _embedding_invoke(
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
) -> tuple[list[list[float]], int]:
"""
Invoke embedding model
:param model: model name
:param client: model client
:param texts: texts to embed
:param extra_model_kwargs: extra model kwargs
:return: embeddings and used tokens
"""
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
return [data.embedding for data in response.data], response.usage.total_tokens
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
input_price_info = self.get_price(
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
)
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-001
label:
en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-002
label:
en_US: Gemini 1.5 Flash 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash-8b-exp-0924
label:
en_US: Gemini 1.5 Flash 8B 0924
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest model: gemini-1.5-flash-latest
label: label:
en_US: Gemini 1.5 Flash en_US: Gemini 1.5 Flash Latest
model_type: llm model_type: llm
features: features:
- agent-thought - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-flash
label:
en_US: Gemini 1.5 Flash
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-001
label:
en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro-002
label:
en_US: Gemini 1.5 Pro 002
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest model: gemini-1.5-pro-latest
label: label:
en_US: Gemini 1.5 Pro en_US: Gemini 1.5 Pro Latest
model_type: llm model_type: llm
features: features:
- agent-thought - agent-thought
@ -32,6 +32,15 @@ parameter_rules:
max: 8192 max: 8192
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -0,0 +1,48 @@
model: gemini-1.5-pro
label:
en_US: Gemini 1.5 Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 2097152
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens_to_sample
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@ -27,6 +27,15 @@ parameter_rules:
default: 4096 default: 4096
min: 1 min: 1
max: 4096 max: 4096
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -31,6 +31,15 @@ parameter_rules:
max: 2048 max: 2048
- name: response_format - name: response_format
use_template: response_format use_template: response_format
- name: stream
label:
zh_Hans: 流式输出
en_US: Stream
type: boolean
help:
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
default: false
pricing: pricing:
input: '0.00' input: '0.00'
output: '0.00' output: '0.00'

View File

@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai import google.generativeai as genai
import requests import requests
from google.api_core import exceptions from google.api_core import exceptions
from google.generativeai import client from google.generativeai.client import _ClientManager
from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part from google.generativeai.types.content_types import to_part
from PIL import Image from PIL import Image
@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content) history.append(content)
# Create a new ClientManager with tenant's API key # Create a new ClientManager with tenant's API key
new_client_manager = client._ClientManager() new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"]) new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative") new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client google_model._client = new_custom_client
safety_settings = {
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
}
response = google_model.generate_content( response = google_model.generate_content(
contents=history, contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs), generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream, stream=stream,
safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None, tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600}, request_options={"timeout": 600},
) )

View File

@ -0,0 +1,25 @@
model: llama-3.2-11b-text-preview
label:
zh_Hans: Llama 3.2 11B Text (Preview)
en_US: Llama 3.2 11B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-1b-preview
label:
zh_Hans: Llama 3.2 1B Text (Preview)
en_US: Llama 3.2 1B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-3b-preview
label:
zh_Hans: Llama 3.2 3B Text (Preview)
en_US: Llama 3.2 3B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -0,0 +1,25 @@
model: llama-3.2-90b-text-preview
label:
zh_Hans: Llama 3.2 90B Text (Preview)
en_US: Llama 3.2 90B Text (Preview)
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@ -6,6 +6,7 @@ import numpy as np
import requests import requests
from huggingface_hub import HfApi, InferenceClient from huggingface_hub import HfApi, InferenceClient
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel): class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client = InferenceClient(token=credentials["huggingfacehub_api_token"]) client = InferenceClient(token=credentials["huggingfacehub_api_token"])
execute_model = model execute_model = model

View File

@ -1,6 +1,7 @@
import time import time
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
server_url = credentials["server_url"] server_url = credentials["server_url"]

View File

@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -1,6 +1,6 @@
provider: jina provider: jina
label: label:
en_US: Jina en_US: Jina AI
description: description:
en_US: Embedding and Rerank Model Supported en_US: Embedding and Rerank Model Supported
icon_small: icon_small:
@ -11,7 +11,7 @@ background: "#EFFDFD"
help: help:
title: title:
en_US: Get your API key from Jina AI en_US: Get your API key from Jina AI
zh_Hans: 从 Jina 获取 API Key zh_Hans: 从 Jina AI 获取 API Key
url: url:
en_US: https://jina.ai/ en_US: https://jina.ai/
supported_model_types: supported_model_types:

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -27,8 +28,37 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1" api_base: str = "https://api.jina.ai/v1"
def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
"""
Parse model credentials
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return: parsed credentials
"""
def transform_jina_input_text(model, text):
if model == "jina-clip-v1":
return {"text": text}
return text
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
# model specific parameters
if model == "jina-embeddings-v3":
# set `task` type according to input type for the best performance
data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
return data
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -37,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]
@ -49,15 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings" url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"} headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
def transform_jina_input_text(model, text): data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
if model == "jina-clip-v1":
return {"text": text}
return text
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
if model == "jina-embeddings-v3":
data["task"] = "text-matching"
try: try:
response = post(url, headers=headers, data=dumps(data)) response = post(url, headers=headers, data=dumps(data))

View File

@ -5,6 +5,7 @@ from typing import Optional
from requests import post from requests import post
from yarl import URL from yarl import URL
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
class LocalAITextEmbeddingModel(TextEmbeddingModel): class LocalAITextEmbeddingModel(TextEmbeddingModel):
""" """
Model class for Jina text embedding model. Model class for LocalAI text embedding model.
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
if len(texts) != 1: if len(texts) != 1:

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.minimax.chat/v1/embeddings" api_base: str = "https://api.minimax.chat/v1/embeddings"
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["minimax_api_key"] api_key = credentials["minimax_api_key"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

View File

@ -0,0 +1,27 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class MixedBreadProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
"""
Validate provider credentials
if validate failed, raise exception
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
"""
try:
model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
# Use `mxbai-embed-large-v1` model for validate,
model_instance.validate_credentials(model="mxbai-embed-large-v1", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:
logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
raise ex

View File

@ -0,0 +1,31 @@
provider: mixedbread
label:
en_US: MixedBread
description:
en_US: Embedding and Rerank Model Supported
icon_small:
en_US: icon_s_en.png
icon_large:
en_US: icon_l_en.png
background: "#EFFDFD"
help:
title:
en_US: Get your API key from MixedBread AI
zh_Hans: 从 MixedBread 获取 API Key
url:
en_US: https://www.mixedbread.ai/
supported_model_types:
- text-embedding
- rerank
configurate_methods:
- predefined-model
provider_credential_schema:
credential_form_schemas:
- variable: api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key

View File

@ -0,0 +1,4 @@
model: mxbai-rerank-large-v1
model_type: rerank
model_properties:
context_size: 512

View File

@ -0,0 +1,125 @@
from typing import Optional
import httpx
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
class MixedBreadRerankModel(RerankModel):
"""
Model class for MixedBread rerank model.
"""
def _invoke(
self,
model: str,
credentials: dict,
query: str,
docs: list[str],
score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None,
) -> RerankResult:
"""
Invoke rerank model
:param model: model name
:param credentials: model credentials
:param query: search query
:param docs: docs for reranking
:param score_threshold: score threshold
:param top_n: top n documents to return
:param user: unique user id
:return: rerank result
"""
if len(docs) == 0:
return RerankResult(model=model, docs=[])
base_url = credentials.get("base_url", "https://api.mixedbread.ai/v1")
base_url = base_url.removesuffix("/")
try:
response = httpx.post(
base_url + "/reranking",
json={"model": model, "query": query, "input": docs, "top_k": top_n, "return_input": True},
headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
)
response.raise_for_status()
results = response.json()
rerank_documents = []
for result in results["data"]:
rerank_document = RerankDocument(
index=result["index"],
text=result["input"],
score=result["score"],
)
if score_threshold is None or result["score"] >= score_threshold:
rerank_documents.append(rerank_document)
return RerankResult(model=model, docs=rerank_documents)
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(
model=model,
credentials=credentials,
query="What is the capital of the United States?",
docs=[
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
"Census, Carson City had a population of 55,274.",
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
"are a political division controlled by the United States. Its capital is Saipan.",
],
score_threshold=0.8,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
"""
return {
InvokeConnectionError: [httpx.ConnectError],
InvokeServerUnavailableError: [httpx.RemoteProtocolError],
InvokeRateLimitError: [],
InvokeAuthorizationError: [httpx.HTTPStatusError],
InvokeBadRequestError: [httpx.RequestError],
}
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.RERANK,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
)
return entity

View File

@ -0,0 +1,8 @@
model: mxbai-embed-2d-large-v1
model_type: text-embedding
model_properties:
context_size: 512
pricing:
input: '0.0001'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,8 @@
model: mxbai-embed-large-v1
model_type: text-embedding
model_properties:
context_size: 512
pricing:
input: '0.0001'
unit: '0.001'
currency: USD

View File

@ -0,0 +1,170 @@
import time
from json import JSONDecodeError, dumps
from typing import Optional
import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
"""
Model class for MixedBread text embedding model.
"""
api_base: str = "https://api.mixedbread.ai/v1"
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
if not api_key:
raise CredentialsValidateFailedError("api_key is required")
base_url = credentials.get("base_url", self.api_base)
base_url = base_url.removesuffix("/")
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
data = {"model": model, "input": texts}
try:
response = requests.post(url, headers=headers, data=dumps(data))
except Exception as e:
raise InvokeConnectionError(str(e))
if response.status_code != 200:
try:
resp = response.json()
msg = resp["detail"]
if response.status_code == 401:
raise InvokeAuthorizationError(msg)
elif response.status_code == 429:
raise InvokeRateLimitError(msg)
elif response.status_code == 500:
raise InvokeServerUnavailableError(msg)
else:
raise InvokeBadRequestError(msg)
except JSONDecodeError as e:
raise InvokeServerUnavailableError(
f"Failed to convert response to json: {e} with text: {response.text}"
)
try:
resp = response.json()
embeddings = resp["data"]
usage = resp["usage"]
except Exception as e:
raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
result = TextEmbeddingResult(
model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
)
return result
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(model=model, credentials=credentials, texts=["ping"])
except Exception as e:
raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
}
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
# get input price info
input_price_info = self.get_price(
model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
)
# transform usage
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.TEXT_EMBEDDING,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
)
return entity

View File

@ -0,0 +1,13 @@
<svg width="93" height="31" viewBox="0 0 93 31" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M89.6037 29.888C88.9464 29.888 88.3667 29.7302 87.8647 29.4145C87.3626 29.0936 86.9719 28.6407 86.6924 28.0559L87.2979 27.4037C87.5464 27.9109 87.8698 28.3069 88.2684 28.5915C88.6669 28.871 89.1094 29.0108 89.5959 29.0108C89.922 29.0108 90.2196 28.9435 90.4887 28.8089C90.763 28.6744 90.9804 28.4829 91.1408 28.2344C91.3064 27.9808 91.3892 27.6806 91.3892 27.3339C91.3892 27.0182 91.3116 26.7697 91.1563 26.5886C91.0062 26.4074 90.7837 26.2522 90.4887 26.1228C90.1988 25.9882 89.8366 25.8381 89.4018 25.6725C89.0654 25.5379 88.7393 25.3853 88.4236 25.2145C88.1079 25.0437 87.8465 24.8289 87.6395 24.5701C87.4377 24.3061 87.3367 23.9723 87.3367 23.5686C87.3367 23.1598 87.4454 22.7975 87.6628 22.4817C87.8802 22.1609 88.1804 21.9098 88.5634 21.7287C88.9464 21.5424 89.3811 21.4492 89.8676 21.4492C90.3127 21.4492 90.7293 21.545 91.1175 21.7365C91.5109 21.928 91.8628 22.1997 92.1733 22.5516L91.6532 23.2115C91.177 22.5853 90.5844 22.2721 89.8754 22.2721C89.4406 22.2721 89.0861 22.386 88.8118 22.6137C88.5427 22.8415 88.4081 23.1391 88.4081 23.5065C88.4081 23.7705 88.4935 23.9904 88.6643 24.1664C88.8351 24.3424 89.0576 24.4925 89.3319 24.6167C89.6114 24.7409 89.9116 24.8651 90.2325 24.9893C90.6983 25.1653 91.102 25.3413 91.4436 25.5172C91.7903 25.6932 92.0595 25.9183 92.251 26.1927C92.4425 26.4618 92.5382 26.8293 92.5382 27.2951C92.5382 27.8281 92.414 28.2888 92.1656 28.6769C91.9171 29.0651 91.5704 29.3653 91.1253 29.5775C90.6854 29.7845 90.1781 29.888 89.6037 29.888Z" fill="#3C593D"/>
<path d="M79.8324 29.8841C79.0871 29.8841 78.4143 29.7029 77.8139 29.3406C77.2187 28.9732 76.7451 28.4711 76.3932 27.8345C76.0464 27.1979 75.873 26.4708 75.873 25.653C75.873 24.8456 76.0438 24.1262 76.3854 23.4948C76.7322 22.8582 77.2032 22.3562 77.7984 21.9887C78.3987 21.6212 79.0767 21.4375 79.8324 21.4375C80.5518 21.4375 81.2039 21.6057 81.7888 21.9421C82.3736 22.2785 82.8187 22.7443 83.1241 23.3395V21.6859H84.2575V29.6356H83.1241V27.9587C82.7825 28.5591 82.3244 29.0301 81.7499 29.3717C81.1754 29.7133 80.5363 29.8841 79.8324 29.8841ZM80.1119 28.8981C80.7071 28.8981 81.2324 28.761 81.6878 28.4867C82.1485 28.2072 82.5107 27.8242 82.7747 27.3377C83.0387 26.846 83.1706 26.287 83.1706 25.6608C83.1706 25.0294 83.0387 24.4704 82.7747 23.9839C82.5159 23.4974 82.1562 23.117 81.6956 22.8427C81.235 22.5632 80.7071 22.4235 80.1119 22.4235C79.5167 22.4235 78.9888 22.5632 78.5281 22.8427C78.0675 23.117 77.7052 23.4974 77.4413 23.9839C77.1773 24.4704 77.0453 25.0294 77.0453 25.6608C77.0453 26.287 77.1773 26.846 77.4413 27.3377C77.7052 27.8242 78.0675 28.2072 78.5281 28.4867C78.9888 28.761 79.5167 28.8981 80.1119 28.8981Z" fill="#3C593D"/>
<path d="M71.9658 29.6382V16.2852H73.0993V29.6382H71.9658Z" fill="#3C593D"/>
<path d="M68.1539 29.8864C67.5587 29.8864 67.0955 29.6871 66.7643 29.2886C66.4382 28.8849 66.2752 28.3182 66.2752 27.5884V22.5422H65.4678V21.6882H66.2752V18.7148H67.4086V21.6882H69.3883V22.5422H67.4086V27.5263C67.4086 27.9662 67.494 28.3026 67.6648 28.5355C67.8356 28.7684 68.0789 28.8849 68.3946 28.8849C68.6999 28.8849 68.9691 28.7995 69.202 28.6287L69.4892 29.5292C69.3132 29.6379 69.1062 29.7233 68.8681 29.7854C68.6301 29.8527 68.392 29.8864 68.1539 29.8864Z" fill="#3C593D"/>
<path d="M58.513 29.8841C57.7678 29.8841 57.0949 29.7029 56.4946 29.3406C55.8994 28.9732 55.4258 28.4711 55.0739 27.8345C54.7271 27.1979 54.5537 26.4708 54.5537 25.653C54.5537 24.8456 54.7245 24.1262 55.0661 23.4948C55.4129 22.8582 55.8838 22.3562 56.479 21.9887C57.0794 21.6212 57.7574 21.4375 58.513 21.4375C59.2324 21.4375 59.8846 21.6057 60.4694 21.9421C61.0543 22.2785 61.4994 22.7443 61.8047 23.3395V21.6859H62.9382V29.6356H61.8047V27.9587C61.4631 28.5591 61.0051 29.0301 60.4306 29.3717C59.8561 29.7133 59.2169 29.8841 58.513 29.8841ZM58.7925 28.8981C59.3877 28.8981 59.913 28.761 60.3685 28.4867C60.8291 28.2072 61.1914 27.8242 61.4554 27.3377C61.7193 26.846 61.8513 26.287 61.8513 25.6608C61.8513 25.0294 61.7193 24.4704 61.4554 23.9839C61.1966 23.4974 60.8369 23.117 60.3763 22.8427C59.9156 22.5632 59.3877 22.4235 58.7925 22.4235C58.1973 22.4235 57.6694 22.5632 57.2088 22.8427C56.7482 23.117 56.3859 23.4974 56.1219 23.9839C55.858 24.4704 55.726 25.0294 55.726 25.6608C55.726 26.287 55.858 26.846 56.1219 27.3377C56.3859 27.8242 56.7482 28.2072 57.2088 28.4867C57.6694 28.761 58.1973 28.8981 58.7925 28.8981Z" fill="#3C593D"/>
<path d="M5.41228 22.6607V0H6.76535V30.2143H5.41228L1.35307 7.55357V30.2143H0V0H1.35307L5.41228 22.6607Z" fill="#3C593D"/>
<path d="M13.6575 28.9006C14.024 28.9006 14.3341 28.7775 14.5878 28.5312C14.8697 28.2848 15.0106 27.9701 15.0106 27.587V2.62733C15.0106 2.27154 14.8697 1.9705 14.5878 1.72418C14.3341 1.4505 14.024 1.31366 13.6575 1.31366C13.2629 1.31366 12.9387 1.4505 12.685 1.72418C12.4313 1.9705 12.3045 2.27154 12.3045 2.62733V27.587C12.3045 27.9701 12.4313 28.2848 12.685 28.5312C12.9387 28.7775 13.2629 28.9006 13.6575 28.9006ZM13.6575 30.2143C12.8964 30.2143 12.2481 29.968 11.7125 29.4753C11.2051 28.9554 10.9514 28.3259 10.9514 27.587V2.62733C10.9514 1.91576 11.2051 1.29998 11.7125 0.779988C12.2481 0.259996 12.8964 0 13.6575 0C14.3905 0 15.0247 0.259996 15.5603 0.779988C16.0959 1.29998 16.3637 1.91576 16.3637 2.62733V27.587C16.3637 28.3259 16.0959 28.9554 15.5603 29.4753C15.0247 29.968 14.3905 30.2143 13.6575 30.2143Z" fill="#3C593D"/>
<path d="M28.3299 0H29.683V30.2143H28.3299V5.25466L24.9472 18.3913L21.5645 5.25466V30.2143H20.2115V0H21.5645L24.9472 13.1366L28.3299 0Z" fill="#3C593D"/>
<path d="M33.6999 30.2143V0H35.0529V30.2143H33.6999Z" fill="#3C593D"/>
<path d="M41.776 30.2143C41.0149 30.2143 40.3666 29.968 39.831 29.4753C39.3236 28.9554 39.0699 28.3259 39.0699 27.587V2.62733C39.0699 1.91576 39.3236 1.29998 39.831 0.779988C40.3666 0.259996 41.0149 0 41.776 0C42.5089 0 43.1432 0.259996 43.6788 0.779988C44.2143 1.29998 44.4821 1.91576 44.4821 2.62733V5.25466H43.1291V2.62733C43.1291 2.27154 42.9881 1.9705 42.7062 1.72418C42.4525 1.4505 42.1425 1.31366 41.776 1.31366C41.3814 1.31366 41.0572 1.4505 40.8035 1.72418C40.5498 1.9705 40.4229 2.27154 40.4229 2.62733V27.587C40.4229 27.9701 40.5498 28.2848 40.8035 28.5312C41.0572 28.7775 41.3814 28.9006 41.776 28.9006C42.1425 28.9006 42.4525 28.7775 42.7062 28.5312C42.9881 28.2848 43.1291 27.9701 43.1291 27.587V24.9596H44.4821V27.587C44.4821 28.3259 44.2143 28.9554 43.6788 29.4753C43.1432 29.968 42.5089 30.2143 41.776 30.2143Z" fill="#3C593D"/>
<path d="M56 1H91" stroke="#3C593D" stroke-linecap="round" stroke-dasharray="0.1 2"/>
</svg>

After

Width:  |  Height:  |  Size: 6.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -0,0 +1,28 @@
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
class _CommonNomic:
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
return {
InvokeConnectionError: [InvokeConnectionError],
InvokeServerUnavailableError: [InvokeServerUnavailableError],
InvokeRateLimitError: [InvokeRateLimitError],
InvokeAuthorizationError: [InvokeAuthorizationError],
InvokeBadRequestError: [KeyError, InvokeBadRequestError],
}

View File

@ -0,0 +1,26 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class NomicAtlasProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
"""
Validate provider credentials
if validate failed, raise exception
:param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
"""
try:
model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:
logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
raise ex

View File

@ -0,0 +1,29 @@
provider: nomic
label:
zh_Hans: Nomic Atlas
en_US: Nomic Atlas
icon_small:
en_US: icon_s_en.png
icon_large:
en_US: icon_l_en.svg
background: "#EFF1FE"
help:
title:
en_US: Get your API key from Nomic Atlas
zh_Hans: 从Nomic Atlas获取 API Key
url:
en_US: https://atlas.nomic.ai/data
supported_model_types:
- text-embedding
configurate_methods:
- predefined-model
provider_credential_schema:
credential_form_schemas:
- variable: nomic_api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key

View File

@ -0,0 +1,8 @@
model: nomic-embed-text-v1.5
model_type: text-embedding
model_properties:
context_size: 8192
pricing:
input: "0.1"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,8 @@
model: nomic-embed-text-v1
model_type: text-embedding
model_properties:
context_size: 8192
pricing:
input: "0.1"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,165 @@
import time
from functools import wraps
from typing import Optional
from nomic import embed
from nomic import login as nomic_login
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage,
TextEmbeddingResult,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import (
TextEmbeddingModel,
)
from core.model_runtime.model_providers.nomic._common import _CommonNomic
def nomic_login_required(func):
@wraps(func)
def wrapper(*args, **kwargs):
try:
if not kwargs.get("credentials"):
raise ValueError("missing credentials parameters")
credentials = kwargs.get("credentials")
if "nomic_api_key" not in credentials:
raise ValueError("missing nomic_api_key in credentials parameters")
# nomic login
nomic_login(credentials["nomic_api_key"])
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
return func(*args, **kwargs)
return wrapper
class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
"""
Model class for nomic text embedding model.
"""
def _invoke(
self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
embeddings, prompt_tokens, total_tokens = self.embed_text(
model=model,
credentials=credentials,
texts=texts,
)
# calc usage
usage = self._calc_response_usage(
model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens
)
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
# call embedding model
self.embed_text(model=model, credentials=credentials, texts=["ping"])
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@nomic_login_required
def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]:
"""Call out to Nomic's embedding endpoint.
Args:
model: The model to use for embedding.
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text, and tokens usage.
"""
embeddings: list[list[float]] = []
prompt_tokens = 0
total_tokens = 0
response = embed.text(
model=model,
texts=texts,
)
if not (response and "embeddings" in response):
raise ValueError("Embedding data is missing in the response.")
if not (response and "usage" in response):
raise ValueError("Response usage is missing.")
if "prompt_tokens" not in response["usage"]:
raise ValueError("Response usage does not contain prompt tokens.")
if "total_tokens" not in response["usage"]:
raise ValueError("Response usage does not contain total tokens.")
embeddings = [list(map(float, e)) for e in response["embeddings"]]
total_tokens = response["usage"]["total_tokens"]
prompt_tokens = response["usage"]["prompt_tokens"]
return embeddings, prompt_tokens, total_tokens
def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: prompt tokens
:param total_tokens: total tokens
:return: usage
"""
# get input price info
input_price_info = self.get_price(
model=model,
credentials=credentials,
price_type=PriceType.INPUT,
tokens=tokens,
)
# transform usage
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=total_tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at,
)
return usage

View File

@ -4,6 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
models: list[str] = ["NV-Embed-QA"] models: list[str] = ["NV-Embed-QA"]
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
api_key = credentials["api_key"] api_key = credentials["api_key"]

View File

@ -6,6 +6,7 @@ from typing import Optional
import numpy as np import numpy as np
import oci import oci
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
if chunk_json["done"]: if chunk_json["done"]:
# calculate num tokens # calculate num tokens
if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: if "prompt_eval_count" in chunk_json:
# transform usage
prompt_tokens = chunk_json["prompt_eval_count"] prompt_tokens = chunk_json["prompt_eval_count"]
completion_tokens = chunk_json["eval_count"]
else: else:
# calculate num tokens prompt_message_content = prompt_messages[0].content
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) if isinstance(prompt_message_content, str):
completion_tokens = self._get_num_tokens_by_gpt2(full_text) prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
else:
content_text = ""
for message_content in prompt_message_content:
if message_content.type == PromptMessageContentType.TEXT:
message_content = cast(TextPromptMessageContent, message_content)
content_text += message_content.data
prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
# transform usage # transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)

View File

@ -8,6 +8,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -6,6 +6,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import OpenAI from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError
@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# transform credentials to kwargs for model instance # transform credentials to kwargs for model instance

View File

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -5,6 +5,7 @@ from typing import Optional
from requests import post from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (
@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
server_url = credentials["server_url"] server_url = credentials["server_url"]

View File

@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
""" """
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """

View File

@ -4,6 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient from replicate import Client as ReplicateClient
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel): class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30) client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
if "model_version" in credentials: if "model_version" in credentials:

View File

@ -84,9 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
Model class for Cohere large language model. Model class for Cohere large language model.
""" """
sagemaker_client: Any = None sagemaker_session: Any = None
sagemaker_sess: Any = None
predictor: Any = None predictor: Any = None
sagemaker_endpoint: str = None
def _handle_chat_generate_response( def _handle_chat_generate_response(
self, self,
@ -212,27 +212,29 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
:param user: unique user id :param user: unique user id
:return: full response or stream response chunk generator result :return: full response or stream response chunk generator result
""" """
if not self.sagemaker_client: if not self.sagemaker_session:
access_key = credentials.get("access_key") access_key = credentials.get("aws_access_key_id")
secret_key = credentials.get("secret_key") secret_key = credentials.get("aws_secret_access_key")
aws_region = credentials.get("aws_region") aws_region = credentials.get("aws_region")
boto_session = None
if aws_region: if aws_region:
if access_key and secret_key: if access_key and secret_key:
self.sagemaker_client = boto3.client( boto_session = boto3.Session(
"sagemaker-runtime", aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=aws_region
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
region_name=aws_region,
) )
else: else:
self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region) boto_session = boto3.Session(region_name=aws_region)
else: else:
self.sagemaker_client = boto3.client("sagemaker-runtime") boto_session = boto3.Session()
sagemaker_session = Session(sagemaker_runtime_client=self.sagemaker_client) sagemaker_client = boto_session.client("sagemaker")
self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
self.predictor = Predictor( self.predictor = Predictor(
endpoint_name=credentials.get("sagemaker_endpoint"), endpoint_name=self.sagemaker_endpoint,
sagemaker_session=sagemaker_session, sagemaker_session=self.sagemaker_session,
serializer=serializers.JSONSerializer(), serializer=serializers.JSONSerializer(),
) )

View File

@ -6,6 +6,7 @@ from typing import Any, Optional
import boto3 import boto3
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
return embeddings return embeddings
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
""" """
Invoke text embedding model Invoke text embedding model
@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials :param credentials: model credentials
:param texts: texts to embed :param texts: texts to embed
:param user: unique user id :param user: unique user id
:param input_type: input type
:return: embeddings result :return: embeddings result
""" """
# get model properties # get model properties

View File

@ -1,25 +1,38 @@
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-72B-Instruct - Qwen/Qwen2.5-72B-Instruct
- Qwen/Qwen2.5-Math-72B-Instruct
- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-14B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-Coder-7B-Instruct
- deepseek-ai/DeepSeek-V2.5
- Qwen/Qwen2-72B-Instruct - Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct - Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct - Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct - Qwen/Qwen2-1.5B-Instruct
- 01-ai/Yi-1.5-34B-Chat
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- THUDM/glm-4-9b-chat
- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat - deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct - deepseek-ai/DeepSeek-Coder-V2-Instruct
- THUDM/glm-4-9b-chat
- THUDM/chatglm3-6b
- 01-ai/Yi-1.5-34B-Chat-16K
- 01-ai/Yi-1.5-9B-Chat-16K
- 01-ai/Yi-1.5-6B-Chat
- internlm/internlm2_5-20b-chat
- internlm/internlm2_5-7b-chat - internlm/internlm2_5-7b-chat
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-405B-Instruct - meta-llama/Meta-Llama-3.1-405B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct - meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mixtral-8x7B-Instruct-v0.1 - meta-llama/Meta-Llama-3-70B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
- google/gemma-2-27b-it
- google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2 - mistralai/Mistral-7B-Instruct-v0.2
- Pro/Qwen/Qwen2-7B-Instruct
- Pro/Qwen/Qwen2-1.5B-Instruct
- Pro/THUDM/glm-4-9b-chat
- Pro/THUDM/chatglm3-6b
- Pro/01-ai/Yi-1.5-9B-Chat-16K
- Pro/01-ai/Yi-1.5-6B-Chat
- Pro/internlm/internlm2_5-7b-chat
- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
- Pro/meta-llama/Meta-Llama-3-8B-Instruct
- Pro/google/gemma-2-9b-it

View File

@ -28,3 +28,4 @@ pricing:
output: '0' output: '0'
unit: '0.000001' unit: '0.000001'
currency: RMB currency: RMB
deprecated: true

View File

@ -28,3 +28,4 @@ pricing:
output: '1.26' output: '1.26'
unit: '0.000001' unit: '0.000001'
currency: RMB currency: RMB
deprecated: true

View File

@ -1,5 +1,6 @@
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel, OAICompatEmbeddingModel,
@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
super().validate_credentials(model, credentials) super().validate_credentials(model, credentials)
def _invoke( def _invoke(
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None self,
model: str,
credentials: dict,
texts: list[str],
user: Optional[str] = None,
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult: ) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
self._add_custom_parameters(credentials) self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user) return super()._invoke(model, credentials, texts, user)

View File

@ -25,6 +25,7 @@ class SparkLLMClient:
"spark-pro": {"version": "v3.1", "chat_domain": "generalv3"}, "spark-pro": {"version": "v3.1", "chat_domain": "generalv3"},
"spark-pro-128k": {"version": "pro-128k", "chat_domain": "pro-128k"}, "spark-pro-128k": {"version": "pro-128k", "chat_domain": "pro-128k"},
"spark-max": {"version": "v3.5", "chat_domain": "generalv3.5"}, "spark-max": {"version": "v3.5", "chat_domain": "generalv3.5"},
"spark-max-32k": {"version": "max-32k", "chat_domain": "max-32k"},
"spark-4.0-ultra": {"version": "v4.0", "chat_domain": "4.0Ultra"}, "spark-4.0-ultra": {"version": "v4.0", "chat_domain": "4.0Ultra"},
} }
@ -32,7 +33,7 @@ class SparkLLMClient:
self.chat_domain = model_api_configs[model]["chat_domain"] self.chat_domain = model_api_configs[model]["chat_domain"]
if model == "spark-pro-128k": if model in ["spark-pro-128k", "spark-max-32k"]:
self.api_base = f"wss://{domain}/{endpoint}/{api_version}" self.api_base = f"wss://{domain}/{endpoint}/{api_version}"
else: else:
self.api_base = f"wss://{domain}/{api_version}/{endpoint}" self.api_base = f"wss://{domain}/{api_version}/{endpoint}"

View File

@ -1,3 +1,4 @@
- spark-max-32k
- spark-4.0-ultra - spark-4.0-ultra
- spark-max - spark-max
- spark-pro-128k - spark-pro-128k

View File

@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
:param prompt_messages: prompt messages :param prompt_messages: prompt messages
:return: llm response chunk generator result :return: llm response chunk generator result
""" """
completion = ""
for index, content in enumerate(client.subscribe()): for index, content in enumerate(client.subscribe()):
if isinstance(content, dict): if isinstance(content, dict):
delta = content["data"] delta = content["data"]
else: else:
delta = content delta = content
completion += delta
assistant_prompt_message = AssistantPromptMessage( assistant_prompt_message = AssistantPromptMessage(
content=delta or "", content=delta or "",
) )
temp_assistant_prompt_message = AssistantPromptMessage(
content=completion,
)
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
# transform usage # transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)

View File

@ -0,0 +1,33 @@
model: spark-max-32k
label:
en_US: Spark Max-32K
model_type: llm
model_properties:
mode: chat
parameter_rules:
- name: temperature
use_template: temperature
default: 0.5
help:
zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
- name: max_tokens
use_template: max_tokens
default: 4096
min: 1
max: 8192
help:
zh_Hans: 模型回答的tokens的最大长度。
en_US: Maximum length of tokens for the model response.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
default: 4
min: 1
max: 6
help:
zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
en_US: Randomly select one from k candidates (non-equal probability).
required: false

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: farui-plus model: farui-plus
label: label:
en_US: farui-plus en_US: farui-plus
@ -62,16 +63,11 @@ parameter_rules:
type: float type: float
default: 1.1 default: 1.1
label: label:
zh_Hans: 重复惩罚
en_US: Repetition penalty en_US: Repetition penalty
help: help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format - name: response_format
use_template: response_format use_template: response_format
pricing: pricing:

View File

@ -18,7 +18,7 @@ from dashscope.common.error import (
UnsupportedModel, UnsupportedModel,
) )
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.message_entities import (
AssistantPromptMessage, AssistantPromptMessage,
ImagePromptMessageContent, ImagePromptMessageContent,
@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
FetchFrom, FetchFrom,
I18nObject, I18nObject,
ModelFeature, ModelFeature,
ModelPropertyKey,
ModelType, ModelType,
ParameterRule, ParameterRule,
ParameterType, ParameterType,
@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling :param tools: tools for tool calling
:return: :return:
""" """
# Check if the model was added via get_customizable_model_schema
if self.get_customizable_model_schema(model, credentials) is not None:
# For custom models, tokens are not calculated.
return 0
if model in {"qwen-turbo-chat", "qwen-plus-chat"}: if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
model = model.replace("-chat", "") model = model.replace("-chat", "")
if model == "farui-plus": if model == "farui-plus":
@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param credentials: model credentials :param credentials: model credentials
:return: AIModelEntity or None :return: AIModelEntity or None
""" """
rules = [ return AIModelEntity(
ParameterRule(
name="temperature",
type=ParameterType.FLOAT,
use_template="temperature",
label=I18nObject(zh_Hans="温度", en_US="Temperature"),
),
ParameterRule(
name="top_p",
type=ParameterType.FLOAT,
use_template="top_p",
label=I18nObject(zh_Hans="Top P", en_US="Top P"),
),
ParameterRule(
name="top_k",
type=ParameterType.INT,
min=0,
max=99,
label=I18nObject(zh_Hans="top_k", en_US="top_k"),
),
ParameterRule(
name="max_tokens",
type=ParameterType.INT,
min=1,
max=128000,
default=1024,
label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
),
ParameterRule(
name="seed",
type=ParameterType.INT,
default=1234,
label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
),
ParameterRule(
name="repetition_penalty",
type=ParameterType.FLOAT,
default=1.1,
label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
),
]
entity = AIModelEntity(
model=model, model=model,
label=I18nObject(en_US=model), label=I18nObject(en_US=model, zh_Hans=model),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.LLM, model_type=ModelType.LLM,
model_properties={}, features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
parameter_rules=rules, if credentials.get("function_calling_type") == "tool_call"
else [],
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
ModelPropertyKey.MODE: LLMMode.CHAT.value,
},
parameter_rules=[
ParameterRule(
name="temperature",
use_template="temperature",
label=I18nObject(en_US="Temperature", zh_Hans="温度"),
type=ParameterType.FLOAT,
),
ParameterRule(
name="max_tokens",
use_template="max_tokens",
default=512,
min=1,
max=int(credentials.get("max_tokens", 1024)),
label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
type=ParameterType.INT,
),
ParameterRule(
name="top_p",
use_template="top_p",
label=I18nObject(en_US="Top P", zh_Hans="Top P"),
type=ParameterType.FLOAT,
),
ParameterRule(
name="top_k",
use_template="top_k",
label=I18nObject(en_US="Top K", zh_Hans="Top K"),
type=ParameterType.FLOAT,
),
ParameterRule(
name="frequency_penalty",
use_template="frequency_penalty",
label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
type=ParameterType.FLOAT,
),
],
) )
return entity

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-0919 model: qwen-coder-turbo-0919
label: label:
en_US: qwen-coder-turbo-0919 en_US: qwen-coder-turbo-0919
@ -60,16 +61,11 @@ parameter_rules:
type: float type: float
default: 1.1 default: 1.1
label: label:
zh_Hans: 重复惩罚
en_US: Repetition penalty en_US: Repetition penalty
help: help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format - name: response_format
use_template: response_format use_template: response_format
pricing: pricing:

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-latest model: qwen-coder-turbo-latest
label: label:
en_US: qwen-coder-turbo-latest en_US: qwen-coder-turbo-latest
@ -60,16 +61,11 @@ parameter_rules:
type: float type: float
default: 1.1 default: 1.1
label: label:
zh_Hans: 重复惩罚
en_US: Repetition penalty en_US: Repetition penalty
help: help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format - name: response_format
use_template: response_format use_template: response_format
pricing: pricing:

View File

@ -1,3 +1,4 @@
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo model: qwen-coder-turbo
label: label:
en_US: qwen-coder-turbo en_US: qwen-coder-turbo
@ -60,16 +61,11 @@ parameter_rules:
type: float type: float
default: 1.1 default: 1.1
label: label:
zh_Hans: 重复惩罚
en_US: Repetition penalty en_US: Repetition penalty
help: help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- name: enable_search
type: boolean
default: false
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format - name: response_format
use_template: response_format use_template: response_format
pricing: pricing:

Some files were not shown because too many files have changed in this diff Show More