mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-17 06:05:51 +08:00
merge main
This commit is contained in:
commit
8fd04e5313
46
.github/workflows/web-tests.yml
vendored
Normal file
46
.github/workflows/web-tests.yml
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
name: Web Tests
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- web/**
|
||||
|
||||
concurrency:
|
||||
group: web-tests-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: Web Tests
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./web
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v45
|
||||
with:
|
||||
files: web/**
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
with:
|
||||
node-version: 20
|
||||
cache: yarn
|
||||
cache-dependency-path: ./web/package.json
|
||||
|
||||
- name: Install dependencies
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
run: yarn install --frozen-lockfile
|
||||
|
||||
- name: Run tests
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
run: yarn test
|
@ -53,11 +53,9 @@ from services.account_service import AccountService
|
||||
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
|
||||
# fix windows platform
|
||||
if os.name == "nt":
|
||||
os.system('tzutil /s "UTC"')
|
||||
else:
|
||||
os.environ["TZ"] = "UTC"
|
||||
os.environ["TZ"] = "UTC"
|
||||
# windows platform not support tzset
|
||||
if hasattr(time, "tzset"):
|
||||
time.tzset()
|
||||
|
||||
|
||||
|
@ -309,7 +309,7 @@ class AppRunner:
|
||||
if not prompt_messages:
|
||||
prompt_messages = result.prompt_messages
|
||||
|
||||
if not usage and result.delta.usage:
|
||||
if result.delta.usage:
|
||||
usage = result.delta.usage
|
||||
|
||||
if not usage:
|
||||
|
@ -5,6 +5,7 @@ from typing import Optional, cast
|
||||
import numpy as np
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
|
||||
for i in range(0, len(embedding_queue_texts), max_chunks):
|
||||
batch_texts = embedding_queue_texts[i : i + max_chunks]
|
||||
|
||||
embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
|
||||
embedding_result = self._model_instance.invoke_text_embedding(
|
||||
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
|
||||
)
|
||||
|
||||
for vector in embedding_result.embeddings:
|
||||
try:
|
||||
@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
|
||||
redis_client.expire(embedding_cache_key, 600)
|
||||
return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
|
||||
try:
|
||||
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
|
||||
embedding_result = self._model_instance.invoke_text_embedding(
|
||||
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
|
||||
)
|
||||
|
||||
embedding_results = embedding_result.embeddings[0]
|
||||
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
|
||||
|
10
api/core/embedding/embedding_constant.py
Normal file
10
api/core/embedding/embedding_constant.py
Normal file
@ -0,0 +1,10 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class EmbeddingInputType(Enum):
|
||||
"""
|
||||
Enum for embedding input type.
|
||||
"""
|
||||
|
||||
DOCUMENT = "document"
|
||||
QUERY = "query"
|
@ -3,6 +3,7 @@ import os
|
||||
from collections.abc import Callable, Generator, Sequence
|
||||
from typing import IO, Optional, Union, cast
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
|
||||
from core.entities.provider_entities import ModelLoadBalancingConfiguration
|
||||
from core.errors.error import ProviderTokenNotInitError
|
||||
@ -158,12 +159,15 @@ class ModelInstance:
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
|
||||
def invoke_text_embedding(
|
||||
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke large language model
|
||||
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
if not isinstance(self.model_type_instance, TextEmbeddingModel):
|
||||
@ -176,6 +180,7 @@ class ModelInstance:
|
||||
credentials=self.credentials,
|
||||
texts=texts,
|
||||
user=user,
|
||||
input_type=input_type,
|
||||
)
|
||||
|
||||
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from pydantic import ConfigDict
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||
@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
def invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke large language model
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
self.started_at = time.perf_counter()
|
||||
|
||||
try:
|
||||
return self._invoke(model, credentials, texts, user)
|
||||
return self._invoke(model, credentials, texts, user, input_type)
|
||||
except Exception as e:
|
||||
raise self._transform_invoke_error(e)
|
||||
|
||||
@abstractmethod
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke large language model
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
@ -7,6 +7,7 @@ import numpy as np
|
||||
import tiktoken
|
||||
from openai import AzureOpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
|
||||
|
||||
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
base_model_name = credentials["base_model_name"]
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = AzureOpenAI(**credentials_kwargs)
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
|
@ -13,6 +13,7 @@ from botocore.exceptions import (
|
||||
UnknownServiceError,
|
||||
)
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
client_config = Config(region_name=credentials["aws_region"])
|
||||
|
@ -5,6 +5,7 @@ import cohere
|
||||
import numpy as np
|
||||
from cohere.core import RequestOptions
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
|
@ -15,6 +15,7 @@ help:
|
||||
en_US: https://fireworks.ai/account/api-keys
|
||||
supported_model_types:
|
||||
- llm
|
||||
- text-embedding
|
||||
configurate_methods:
|
||||
- predefined-model
|
||||
provider_credential_schema:
|
||||
|
@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Vision Instruct
|
||||
en_US: Llama 3.2 11B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.2'
|
||||
output: '0.2'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Instruct
|
||||
en_US: Llama 3.2 1B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Instruct
|
||||
en_US: Llama 3.2 3B Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.1'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,46 @@
|
||||
model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Vision Instruct
|
||||
en_US: Llama 3.2 90B Vision Instruct
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
- name: context_length_exceeded_behavior
|
||||
default: None
|
||||
label:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
help:
|
||||
zh_Hans: 上下文长度超出行为
|
||||
en_US: Context Length Exceeded Behavior
|
||||
type: string
|
||||
options:
|
||||
- None
|
||||
- truncate
|
||||
- error
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
pricing:
|
||||
input: '0.9'
|
||||
output: '0.9'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,12 @@
|
||||
model: WhereIsAI/UAE-Large-V1
|
||||
label:
|
||||
zh_Hans: UAE-Large-V1
|
||||
en_US: UAE-Large-V1
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
@ -0,0 +1,12 @@
|
||||
model: thenlper/gte-base
|
||||
label:
|
||||
zh_Hans: GTE-base
|
||||
en_US: GTE-base
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
@ -0,0 +1,12 @@
|
||||
model: thenlper/gte-large
|
||||
label:
|
||||
zh_Hans: GTE-large
|
||||
en_US: GTE-large
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 512
|
||||
max_chunks: 1
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
@ -0,0 +1,12 @@
|
||||
model: nomic-ai/nomic-embed-text-v1.5
|
||||
label:
|
||||
zh_Hans: nomic-embed-text-v1.5
|
||||
en_US: nomic-embed-text-v1.5
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
max_chunks: 16
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
@ -0,0 +1,12 @@
|
||||
model: nomic-ai/nomic-embed-text-v1
|
||||
label:
|
||||
zh_Hans: nomic-embed-text-v1
|
||||
en_US: nomic-embed-text-v1
|
||||
model_type: text-embedding
|
||||
model_properties:
|
||||
context_size: 8192
|
||||
max_chunks: 16
|
||||
pricing:
|
||||
input: '0.008'
|
||||
unit: '0.000001'
|
||||
currency: 'USD'
|
@ -0,0 +1,151 @@
|
||||
import time
|
||||
from collections.abc import Mapping
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
from openai import OpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
|
||||
|
||||
|
||||
class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Fireworks text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
extra_model_kwargs = {}
|
||||
if user:
|
||||
extra_model_kwargs["user"] = user
|
||||
|
||||
extra_model_kwargs["encoding_format"] = "float"
|
||||
|
||||
context_size = self._get_context_size(model, credentials)
|
||||
max_chunks = self._get_max_chunks(model, credentials)
|
||||
|
||||
inputs = []
|
||||
indices = []
|
||||
used_tokens = 0
|
||||
|
||||
for i, text in enumerate(texts):
|
||||
# Here token count is only an approximation based on the GPT2 tokenizer
|
||||
# TODO: Optimize for better token estimation and chunking
|
||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||
|
||||
if num_tokens >= context_size:
|
||||
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||
# if num tokens is larger than context length, only use the start
|
||||
inputs.append(text[0:cutoff])
|
||||
else:
|
||||
inputs.append(text)
|
||||
indices += [i]
|
||||
|
||||
batched_embeddings = []
|
||||
_iter = range(0, len(inputs), max_chunks)
|
||||
|
||||
for i in _iter:
|
||||
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||
model=model,
|
||||
client=client,
|
||||
texts=inputs[i : i + max_chunks],
|
||||
extra_model_kwargs=extra_model_kwargs,
|
||||
)
|
||||
used_tokens += embedding_used_tokens
|
||||
batched_embeddings += embeddings_batch
|
||||
|
||||
usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
|
||||
return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
|
||||
|
||||
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
|
||||
"""
|
||||
Get number of tokens for given prompt messages
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:return:
|
||||
"""
|
||||
return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
|
||||
|
||||
def validate_credentials(self, model: str, credentials: Mapping) -> None:
|
||||
"""
|
||||
Validate model credentials
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:return:
|
||||
"""
|
||||
try:
|
||||
# transform credentials to kwargs for model instance
|
||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||
client = OpenAI(**credentials_kwargs)
|
||||
|
||||
# call embedding model
|
||||
self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
|
||||
except Exception as ex:
|
||||
raise CredentialsValidateFailedError(str(ex))
|
||||
|
||||
def _embedding_invoke(
|
||||
self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
|
||||
) -> tuple[list[list[float]], int]:
|
||||
"""
|
||||
Invoke embedding model
|
||||
:param model: model name
|
||||
:param client: model client
|
||||
:param texts: texts to embed
|
||||
:param extra_model_kwargs: extra model kwargs
|
||||
:return: embeddings and used tokens
|
||||
"""
|
||||
response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
|
||||
return [data.embedding for data in response.data], response.usage.total_tokens
|
||||
|
||||
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
|
||||
"""
|
||||
Calculate response usage
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param tokens: input tokens
|
||||
:return: usage
|
||||
"""
|
||||
input_price_info = self.get_price(
|
||||
model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
|
||||
)
|
||||
|
||||
usage = EmbeddingUsage(
|
||||
tokens=tokens,
|
||||
total_tokens=tokens,
|
||||
unit_price=input_price_info.unit_price,
|
||||
price_unit=input_price_info.unit,
|
||||
total_price=input_price_info.total_amount,
|
||||
currency=input_price_info.currency,
|
||||
latency=time.perf_counter() - self.started_at,
|
||||
)
|
||||
|
||||
return usage
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-flash-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-flash-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-flash-8b-exp-0924
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash 8B 0924
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -1,6 +1,6 @@
|
||||
model: gemini-1.5-flash-latest
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash
|
||||
en_US: Gemini 1.5 Flash Latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-flash
|
||||
label:
|
||||
en_US: Gemini 1.5 Flash
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-pro-001
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 001
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-pro-002
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro 002
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -1,6 +1,6 @@
|
||||
model: gemini-1.5-pro-latest
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro
|
||||
en_US: Gemini 1.5 Pro Latest
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
@ -32,6 +32,15 @@ parameter_rules:
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -0,0 +1,48 @@
|
||||
model: gemini-1.5-pro
|
||||
label:
|
||||
en_US: Gemini 1.5 Pro
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 2097152
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_tokens_to_sample
|
||||
use_template: max_tokens
|
||||
required: true
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -27,6 +27,15 @@ parameter_rules:
|
||||
default: 4096
|
||||
min: 1
|
||||
max: 4096
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -31,6 +31,15 @@ parameter_rules:
|
||||
max: 2048
|
||||
- name: response_format
|
||||
use_template: response_format
|
||||
- name: stream
|
||||
label:
|
||||
zh_Hans: 流式输出
|
||||
en_US: Stream
|
||||
type: boolean
|
||||
help:
|
||||
zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
|
||||
en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
|
||||
default: false
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
|
@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
|
||||
import google.generativeai as genai
|
||||
import requests
|
||||
from google.api_core import exceptions
|
||||
from google.generativeai import client
|
||||
from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
|
||||
from google.generativeai.client import _ClientManager
|
||||
from google.generativeai.types import ContentType, GenerateContentResponse
|
||||
from google.generativeai.types.content_types import to_part
|
||||
from PIL import Image
|
||||
|
||||
@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
|
||||
history.append(content)
|
||||
|
||||
# Create a new ClientManager with tenant's API key
|
||||
new_client_manager = client._ClientManager()
|
||||
new_client_manager = _ClientManager()
|
||||
new_client_manager.configure(api_key=credentials["google_api_key"])
|
||||
new_custom_client = new_client_manager.make_client("generative")
|
||||
|
||||
google_model._client = new_custom_client
|
||||
|
||||
safety_settings = {
|
||||
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
|
||||
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
|
||||
}
|
||||
|
||||
response = google_model.generate_content(
|
||||
contents=history,
|
||||
generation_config=genai.types.GenerationConfig(**config_kwargs),
|
||||
stream=stream,
|
||||
safety_settings=safety_settings,
|
||||
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
|
||||
request_options={"timeout": 600},
|
||||
)
|
||||
|
@ -0,0 +1,25 @@
|
||||
model: llama-3.2-11b-text-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 11B Text (Preview)
|
||||
en_US: Llama 3.2 11B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,25 @@
|
||||
model: llama-3.2-1b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 1B Text (Preview)
|
||||
en_US: Llama 3.2 1B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,25 @@
|
||||
model: llama-3.2-3b-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 3B Text (Preview)
|
||||
en_US: Llama 3.2 3B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,25 @@
|
||||
model: llama-3.2-90b-text-preview
|
||||
label:
|
||||
zh_Hans: Llama 3.2 90B Text (Preview)
|
||||
en_US: Llama 3.2 90B Text (Preview)
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 131072
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: max_tokens
|
||||
use_template: max_tokens
|
||||
default: 512
|
||||
min: 1
|
||||
max: 8192
|
||||
pricing:
|
||||
input: '0.05'
|
||||
output: '0.1'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -6,6 +6,7 @@ import numpy as np
|
||||
import requests
|
||||
from huggingface_hub import HfApi, InferenceClient
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
|
||||
|
||||
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
|
||||
|
||||
execute_model = model
|
||||
|
@ -1,6 +1,7 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials["server_url"]
|
||||
|
@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
|
||||
from tencentcloud.common.profile.http_profile import HttpProfile
|
||||
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
|
@ -67,46 +67,3 @@ model_credential_schema:
|
||||
required: false
|
||||
type: text-input
|
||||
default: '8192'
|
||||
- variable: task
|
||||
label:
|
||||
zh_Hans: 下游任务
|
||||
en_US: Downstream task
|
||||
placeholder:
|
||||
zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
|
||||
en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
|
||||
required: false
|
||||
type: select
|
||||
options:
|
||||
- value: retrieval.query
|
||||
label:
|
||||
en_US: retrieval.query
|
||||
- value: retrieval.passage
|
||||
label:
|
||||
en_US: retrieval.passage
|
||||
- value: separation
|
||||
label:
|
||||
en_US: separation
|
||||
- value: classification
|
||||
label:
|
||||
en_US: classification
|
||||
- value: text-matching
|
||||
label:
|
||||
en_US: text-matching
|
||||
- variable: dimensions
|
||||
label:
|
||||
zh_Hans: 输出维度
|
||||
en_US: Output dimensions
|
||||
placeholder:
|
||||
zh_Hans: 输入您的输出维度
|
||||
en_US: Enter output dimensions
|
||||
required: false
|
||||
type: text-input
|
||||
- variable: late_chunking
|
||||
label:
|
||||
zh_Hans: 后期分块
|
||||
en_US: Late chunking
|
||||
placeholder:
|
||||
zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
|
||||
en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
|
||||
required: false
|
||||
type: switch
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
|
||||
|
||||
api_base: str = "https://api.jina.ai/v1"
|
||||
|
||||
def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict:
|
||||
def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
|
||||
"""
|
||||
Parse model credentials
|
||||
|
||||
@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
|
||||
|
||||
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
|
||||
|
||||
task = credentials.get("task")
|
||||
dimensions = credentials.get("dimensions")
|
||||
late_chunking = credentials.get("late_chunking")
|
||||
|
||||
if task is not None:
|
||||
data["task"] = task
|
||||
|
||||
if dimensions is not None:
|
||||
data["dimensions"] = int(dimensions)
|
||||
|
||||
if late_chunking is not None:
|
||||
data["late_chunking"] = late_chunking
|
||||
# model specific parameters
|
||||
if model == "jina-embeddings-v3":
|
||||
# set `task` type according to input type for the best performance
|
||||
data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
|
||||
|
||||
return data
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
|
||||
url = base_url + "/embeddings"
|
||||
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
|
||||
|
||||
data = self._to_payload(model=model, texts=texts, credentials=credentials)
|
||||
data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
|
||||
|
||||
try:
|
||||
response = post(url, headers=headers, data=dumps(data))
|
||||
|
@ -5,6 +5,7 @@ from typing import Optional
|
||||
from requests import post
|
||||
from yarl import URL
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
|
||||
|
||||
class LocalAITextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
Model class for Jina text embedding model.
|
||||
Model class for LocalAI text embedding model.
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
if len(texts) != 1:
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
|
||||
api_base: str = "https://api.minimax.chat/v1/embeddings"
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["minimax_api_key"]
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
|
||||
api_base: str = "https://api.mixedbread.ai/v1"
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
|
@ -5,6 +5,7 @@ from typing import Optional
|
||||
from nomic import embed
|
||||
from nomic import login as nomic_login
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import (
|
||||
EmbeddingUsage,
|
||||
@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
embeddings, prompt_tokens, total_tokens = self.embed_text(
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from requests import post
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
|
||||
models: list[str] = ["NV-Embed-QA"]
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
api_key = credentials["api_key"]
|
||||
|
@ -6,6 +6,7 @@ from typing import Optional
|
||||
import numpy as np
|
||||
import oci
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
|
@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
||||
|
||||
if chunk_json["done"]:
|
||||
# calculate num tokens
|
||||
if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
|
||||
# transform usage
|
||||
if "prompt_eval_count" in chunk_json:
|
||||
prompt_tokens = chunk_json["prompt_eval_count"]
|
||||
completion_tokens = chunk_json["eval_count"]
|
||||
else:
|
||||
# calculate num tokens
|
||||
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
|
||||
completion_tokens = self._get_num_tokens_by_gpt2(full_text)
|
||||
prompt_message_content = prompt_messages[0].content
|
||||
if isinstance(prompt_message_content, str):
|
||||
prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
|
||||
else:
|
||||
content_text = ""
|
||||
for message_content in prompt_message_content:
|
||||
if message_content.type == PromptMessageContentType.TEXT:
|
||||
message_content = cast(TextPromptMessageContent, message_content)
|
||||
content_text += message_content.data
|
||||
prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
|
||||
|
||||
completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
@ -8,6 +8,7 @@ from urllib.parse import urljoin
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
|
@ -6,6 +6,7 @@ import numpy as np
|
||||
import tiktoken
|
||||
from openai import OpenAI
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# transform credentials to kwargs for model instance
|
||||
|
@ -7,6 +7,7 @@ from urllib.parse import urljoin
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
|
@ -5,6 +5,7 @@ from typing import Optional
|
||||
from requests import post
|
||||
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.model_entities import PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
from core.model_runtime.errors.invoke import (
|
||||
@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
server_url = credentials["server_url"]
|
||||
|
@ -7,6 +7,7 @@ from urllib.parse import urljoin
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import (
|
||||
AIModelEntity,
|
||||
@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
|
||||
"""
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
|
||||
|
@ -4,6 +4,7 @@ from typing import Optional
|
||||
|
||||
from replicate import Client as ReplicateClient
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
|
||||
|
||||
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
|
||||
|
||||
if "model_version" in credentials:
|
||||
|
@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
|
||||
Model class for Cohere large language model.
|
||||
"""
|
||||
|
||||
sagemaker_client: Any = None
|
||||
sagemaker_session: Any = None
|
||||
predictor: Any = None
|
||||
sagemaker_endpoint: str = None
|
||||
|
||||
def _handle_chat_generate_response(
|
||||
self,
|
||||
@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
|
||||
:param user: unique user id
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
if not self.sagemaker_client:
|
||||
if not self.sagemaker_session:
|
||||
access_key = credentials.get("aws_access_key_id")
|
||||
secret_key = credentials.get("aws_secret_access_key")
|
||||
aws_region = credentials.get("aws_region")
|
||||
@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
|
||||
else:
|
||||
boto_session = boto3.Session()
|
||||
|
||||
self.sagemaker_client = boto_session.client("sagemaker")
|
||||
sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client)
|
||||
sagemaker_client = boto_session.client("sagemaker")
|
||||
self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
|
||||
|
||||
if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
|
||||
self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
|
||||
self.predictor = Predictor(
|
||||
endpoint_name=credentials.get("sagemaker_endpoint"),
|
||||
sagemaker_session=sagemaker_session,
|
||||
endpoint_name=self.sagemaker_endpoint,
|
||||
sagemaker_session=self.sagemaker_session,
|
||||
serializer=serializers.JSONSerializer(),
|
||||
)
|
||||
|
||||
|
@ -6,6 +6,7 @@ from typing import Any, Optional
|
||||
|
||||
import boto3
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.common_entities import I18nObject
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
|
||||
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
|
||||
@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
|
||||
return embeddings
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
# get model properties
|
||||
|
@ -1,25 +1,38 @@
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
- Qwen/Qwen2.5-14B-Instruct
|
||||
- Qwen/Qwen2.5-32B-Instruct
|
||||
- Qwen/Qwen2.5-72B-Instruct
|
||||
- Qwen/Qwen2.5-Math-72B-Instruct
|
||||
- Qwen/Qwen2.5-32B-Instruct
|
||||
- Qwen/Qwen2.5-14B-Instruct
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
- Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
- deepseek-ai/DeepSeek-V2.5
|
||||
- Qwen/Qwen2-72B-Instruct
|
||||
- Qwen/Qwen2-57B-A14B-Instruct
|
||||
- Qwen/Qwen2-7B-Instruct
|
||||
- Qwen/Qwen2-1.5B-Instruct
|
||||
- 01-ai/Yi-1.5-34B-Chat
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
- 01-ai/Yi-1.5-6B-Chat
|
||||
- THUDM/glm-4-9b-chat
|
||||
- deepseek-ai/DeepSeek-V2.5
|
||||
- deepseek-ai/DeepSeek-V2-Chat
|
||||
- deepseek-ai/DeepSeek-Coder-V2-Instruct
|
||||
- THUDM/glm-4-9b-chat
|
||||
- THUDM/chatglm3-6b
|
||||
- 01-ai/Yi-1.5-34B-Chat-16K
|
||||
- 01-ai/Yi-1.5-9B-Chat-16K
|
||||
- 01-ai/Yi-1.5-6B-Chat
|
||||
- internlm/internlm2_5-20b-chat
|
||||
- internlm/internlm2_5-7b-chat
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- meta-llama/Meta-Llama-3-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-405B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
- meta-llama/Meta-Llama-3-70B-Instruct
|
||||
- meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- google/gemma-2-27b-it
|
||||
- google/gemma-2-9b-it
|
||||
- mistralai/Mistral-7B-Instruct-v0.2
|
||||
- Pro/Qwen/Qwen2-7B-Instruct
|
||||
- Pro/Qwen/Qwen2-1.5B-Instruct
|
||||
- Pro/THUDM/glm-4-9b-chat
|
||||
- Pro/THUDM/chatglm3-6b
|
||||
- Pro/01-ai/Yi-1.5-9B-Chat-16K
|
||||
- Pro/01-ai/Yi-1.5-6B-Chat
|
||||
- Pro/internlm/internlm2_5-7b-chat
|
||||
- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
- Pro/meta-llama/Meta-Llama-3-8B-Instruct
|
||||
- Pro/google/gemma-2-9b-it
|
||||
|
@ -28,3 +28,4 @@ pricing:
|
||||
output: '0'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
deprecated: true
|
||||
|
@ -28,3 +28,4 @@ pricing:
|
||||
output: '1.26'
|
||||
unit: '0.000001'
|
||||
currency: RMB
|
||||
deprecated: true
|
||||
|
@ -1,5 +1,6 @@
|
||||
from typing import Optional
|
||||
|
||||
from core.embedding.embedding_constant import EmbeddingInputType
|
||||
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
|
||||
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
|
||||
OAICompatEmbeddingModel,
|
||||
@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
|
||||
super().validate_credentials(model, credentials)
|
||||
|
||||
def _invoke(
|
||||
self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
|
||||
self,
|
||||
model: str,
|
||||
credentials: dict,
|
||||
texts: list[str],
|
||||
user: Optional[str] = None,
|
||||
input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
|
||||
) -> TextEmbeddingResult:
|
||||
"""
|
||||
Invoke text embedding model
|
||||
|
||||
:param model: model name
|
||||
:param credentials: model credentials
|
||||
:param texts: texts to embed
|
||||
:param user: unique user id
|
||||
:param input_type: input type
|
||||
:return: embeddings result
|
||||
"""
|
||||
self._add_custom_parameters(credentials)
|
||||
return super()._invoke(model, credentials, texts, user)
|
||||
|
||||
|
@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
|
||||
:param prompt_messages: prompt messages
|
||||
:return: llm response chunk generator result
|
||||
"""
|
||||
completion = ""
|
||||
for index, content in enumerate(client.subscribe()):
|
||||
if isinstance(content, dict):
|
||||
delta = content["data"]
|
||||
else:
|
||||
delta = content
|
||||
|
||||
completion += delta
|
||||
assistant_prompt_message = AssistantPromptMessage(
|
||||
content=delta or "",
|
||||
)
|
||||
|
||||
temp_assistant_prompt_message = AssistantPromptMessage(
|
||||
content=completion,
|
||||
)
|
||||
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
|
||||
completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
|
||||
|
||||
# transform usage
|
||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: farui-plus
|
||||
label:
|
||||
en_US: farui-plus
|
||||
|
@ -18,7 +18,7 @@ from dashscope.common.error import (
|
||||
UnsupportedModel,
|
||||
)
|
||||
|
||||
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
ImagePromptMessageContent,
|
||||
@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
|
||||
FetchFrom,
|
||||
I18nObject,
|
||||
ModelFeature,
|
||||
ModelPropertyKey,
|
||||
ModelType,
|
||||
ParameterRule,
|
||||
ParameterType,
|
||||
@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
|
||||
:param tools: tools for tool calling
|
||||
:return:
|
||||
"""
|
||||
# Check if the model was added via get_customizable_model_schema
|
||||
if self.get_customizable_model_schema(model, credentials) is not None:
|
||||
# For custom models, tokens are not calculated.
|
||||
return 0
|
||||
|
||||
if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
|
||||
model = model.replace("-chat", "")
|
||||
if model == "farui-plus":
|
||||
@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
|
||||
:param credentials: model credentials
|
||||
:return: AIModelEntity or None
|
||||
"""
|
||||
rules = [
|
||||
ParameterRule(
|
||||
name="temperature",
|
||||
type=ParameterType.FLOAT,
|
||||
use_template="temperature",
|
||||
label=I18nObject(zh_Hans="温度", en_US="Temperature"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="top_p",
|
||||
type=ParameterType.FLOAT,
|
||||
use_template="top_p",
|
||||
label=I18nObject(zh_Hans="Top P", en_US="Top P"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="top_k",
|
||||
type=ParameterType.INT,
|
||||
min=0,
|
||||
max=99,
|
||||
label=I18nObject(zh_Hans="top_k", en_US="top_k"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="max_tokens",
|
||||
type=ParameterType.INT,
|
||||
min=1,
|
||||
max=128000,
|
||||
default=1024,
|
||||
label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="seed",
|
||||
type=ParameterType.INT,
|
||||
default=1234,
|
||||
label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
|
||||
),
|
||||
ParameterRule(
|
||||
name="repetition_penalty",
|
||||
type=ParameterType.FLOAT,
|
||||
default=1.1,
|
||||
label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
|
||||
),
|
||||
]
|
||||
|
||||
entity = AIModelEntity(
|
||||
return AIModelEntity(
|
||||
model=model,
|
||||
label=I18nObject(en_US=model),
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
label=I18nObject(en_US=model, zh_Hans=model),
|
||||
model_type=ModelType.LLM,
|
||||
model_properties={},
|
||||
parameter_rules=rules,
|
||||
features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
|
||||
if credentials.get("function_calling_type") == "tool_call"
|
||||
else [],
|
||||
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
|
||||
model_properties={
|
||||
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
|
||||
ModelPropertyKey.MODE: LLMMode.CHAT.value,
|
||||
},
|
||||
parameter_rules=[
|
||||
ParameterRule(
|
||||
name="temperature",
|
||||
use_template="temperature",
|
||||
label=I18nObject(en_US="Temperature", zh_Hans="温度"),
|
||||
type=ParameterType.FLOAT,
|
||||
),
|
||||
ParameterRule(
|
||||
name="max_tokens",
|
||||
use_template="max_tokens",
|
||||
default=512,
|
||||
min=1,
|
||||
max=int(credentials.get("max_tokens", 1024)),
|
||||
label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
|
||||
type=ParameterType.INT,
|
||||
),
|
||||
ParameterRule(
|
||||
name="top_p",
|
||||
use_template="top_p",
|
||||
label=I18nObject(en_US="Top P", zh_Hans="Top P"),
|
||||
type=ParameterType.FLOAT,
|
||||
),
|
||||
ParameterRule(
|
||||
name="top_k",
|
||||
use_template="top_k",
|
||||
label=I18nObject(en_US="Top K", zh_Hans="Top K"),
|
||||
type=ParameterType.FLOAT,
|
||||
),
|
||||
ParameterRule(
|
||||
name="frequency_penalty",
|
||||
use_template="frequency_penalty",
|
||||
label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
|
||||
type=ParameterType.FLOAT,
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
return entity
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-coder-turbo-0919
|
||||
label:
|
||||
en_US: qwen-coder-turbo-0919
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-coder-turbo-latest
|
||||
label:
|
||||
en_US: qwen-coder-turbo-latest
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-coder-turbo
|
||||
label:
|
||||
en_US: qwen-coder-turbo
|
||||
|
@ -1,4 +1,4 @@
|
||||
# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-long
|
||||
label:
|
||||
en_US: qwen-long
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-plus-0816
|
||||
label:
|
||||
en_US: qwen-math-plus-0816
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-plus-0919
|
||||
label:
|
||||
en_US: qwen-math-plus-0919
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-plus-latest
|
||||
label:
|
||||
en_US: qwen-math-plus-latest
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-plus
|
||||
label:
|
||||
en_US: qwen-math-plus
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-turbo-0919
|
||||
label:
|
||||
en_US: qwen-math-turbo-0919
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-turbo-latest
|
||||
label:
|
||||
en_US: qwen-math-turbo-latest
|
||||
|
@ -1,3 +1,4 @@
|
||||
# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
|
||||
model: qwen-math-turbo
|
||||
label:
|
||||
en_US: qwen-math-turbo
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-0107
|
||||
label:
|
||||
en_US: qwen-max-0107
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max-0403, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-0403
|
||||
label:
|
||||
en_US: qwen-max-0403
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max-0428, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-0428
|
||||
label:
|
||||
en_US: qwen-max-0428
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max-0919, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-0919
|
||||
label:
|
||||
en_US: qwen-max-0919
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-1201
|
||||
label:
|
||||
en_US: qwen-max-1201
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-latest
|
||||
label:
|
||||
en_US: qwen-max-latest
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max-longcontext
|
||||
label:
|
||||
en_US: qwen-max-longcontext
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-max, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
|
||||
model: qwen-max
|
||||
label:
|
||||
en_US: qwen-max
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-0206, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-0206
|
||||
label:
|
||||
en_US: qwen-plus-0206
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-0624, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-0624
|
||||
label:
|
||||
en_US: qwen-plus-0624
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-0723, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-0723
|
||||
label:
|
||||
en_US: qwen-plus-0723
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-0806, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-0806
|
||||
label:
|
||||
en_US: qwen-plus-0806
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-0919, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-0919
|
||||
label:
|
||||
en_US: qwen-plus-0919
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-chat
|
||||
label:
|
||||
en_US: qwen-plus-chat
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus-latest, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus-latest
|
||||
label:
|
||||
en_US: qwen-plus-latest
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-plus, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
|
||||
model: qwen-plus
|
||||
label:
|
||||
en_US: qwen-plus
|
||||
|
@ -1,3 +1,6 @@
|
||||
# this model corresponds to qwen-turbo-0206, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
|
||||
|
||||
model: qwen-turbo-0206
|
||||
label:
|
||||
en_US: qwen-turbo-0206
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-turbo-0624, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
|
||||
model: qwen-turbo-0624
|
||||
label:
|
||||
en_US: qwen-turbo-0624
|
||||
|
@ -1,3 +1,5 @@
|
||||
# this model corresponds to qwen-turbo-0919, for more details
|
||||
# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
|
||||
model: qwen-turbo-0919
|
||||
label:
|
||||
en_US: qwen-turbo-0919
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user