diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml new file mode 100644 index 0000000000..5aee64b8e6 --- /dev/null +++ b/.github/workflows/web-tests.yml @@ -0,0 +1,46 @@ +name: Web Tests + +on: + pull_request: + branches: + - main + paths: + - web/** + +concurrency: + group: web-tests-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + test: + name: Web Tests + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./web + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Check changed files + id: changed-files + uses: tj-actions/changed-files@v45 + with: + files: web/** + + - name: Setup Node.js + uses: actions/setup-node@v4 + if: steps.changed-files.outputs.any_changed == 'true' + with: + node-version: 20 + cache: yarn + cache-dependency-path: ./web/package.json + + - name: Install dependencies + if: steps.changed-files.outputs.any_changed == 'true' + run: yarn install --frozen-lockfile + + - name: Run tests + if: steps.changed-files.outputs.any_changed == 'true' + run: yarn test diff --git a/api/app.py b/api/app.py index 91a49337fc..1b58beee15 100644 --- a/api/app.py +++ b/api/app.py @@ -53,11 +53,9 @@ from services.account_service import AccountService warnings.simplefilter("ignore", ResourceWarning) -# fix windows platform -if os.name == "nt": - os.system('tzutil /s "UTC"') -else: - os.environ["TZ"] = "UTC" +os.environ["TZ"] = "UTC" +# windows platform not support tzset +if hasattr(time, "tzset"): time.tzset() diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 1b412b8639..203aca3384 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -309,7 +309,7 @@ class AppRunner: if not prompt_messages: prompt_messages = result.prompt_messages - if not usage and result.delta.usage: + if result.delta.usage: usage = result.delta.usage if not usage: diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py index 8ce12fd59f..75219051cd 100644 --- a/api/core/embedding/cached_embedding.py +++ b/api/core/embedding/cached_embedding.py @@ -5,6 +5,7 @@ from typing import Optional, cast import numpy as np from sqlalchemy.exc import IntegrityError +from core.embedding.embedding_constant import EmbeddingInputType from core.model_manager import ModelInstance from core.model_runtime.entities.model_entities import ModelPropertyKey from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel @@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings): for i in range(0, len(embedding_queue_texts), max_chunks): batch_texts = embedding_queue_texts[i : i + max_chunks] - embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user) + embedding_result = self._model_instance.invoke_text_embedding( + texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT + ) for vector in embedding_result.embeddings: try: @@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings): redis_client.expire(embedding_cache_key, 600) return list(np.frombuffer(base64.b64decode(embedding), dtype="float")) try: - embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user) + embedding_result = self._model_instance.invoke_text_embedding( + texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY + ) embedding_results = embedding_result.embeddings[0] embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist() diff --git a/api/core/embedding/embedding_constant.py b/api/core/embedding/embedding_constant.py new file mode 100644 index 0000000000..9b4934646b --- /dev/null +++ b/api/core/embedding/embedding_constant.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EmbeddingInputType(Enum): + """ + Enum for embedding input type. + """ + + DOCUMENT = "document" + QUERY = "query" diff --git a/api/core/model_manager.py b/api/core/model_manager.py index 990efd36c6..74b4452362 100644 --- a/api/core/model_manager.py +++ b/api/core/model_manager.py @@ -3,6 +3,7 @@ import os from collections.abc import Callable, Generator, Sequence from typing import IO, Optional, Union, cast +from core.embedding.embedding_constant import EmbeddingInputType from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.errors.error import ProviderTokenNotInitError @@ -158,12 +159,15 @@ class ModelInstance: tools=tools, ) - def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult: + def invoke_text_embedding( + self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT + ) -> TextEmbeddingResult: """ Invoke large language model :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ if not isinstance(self.model_type_instance, TextEmbeddingModel): @@ -176,6 +180,7 @@ class ModelInstance: credentials=self.credentials, texts=texts, user=user, + input_type=input_type, ) def get_text_embedding_num_tokens(self, texts: list[str]) -> int: diff --git a/api/core/model_runtime/model_providers/__base/text_embedding_model.py b/api/core/model_runtime/model_providers/__base/text_embedding_model.py index 54a4486023..a948dca20d 100644 --- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py +++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py @@ -4,6 +4,7 @@ from typing import Optional from pydantic import ConfigDict +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.model_providers.__base.ai_model import AIModel @@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel): model_config = ConfigDict(protected_namespaces=()) def invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ - Invoke large language model + Invoke text embedding model :param model: model name :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ self.started_at = time.perf_counter() try: - return self._invoke(model, credentials, texts, user) + return self._invoke(model, credentials, texts, user, input_type) except Exception as e: raise self._transform_invoke_error(e) @abstractmethod def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ - Invoke large language model + Invoke text embedding model :param model: model name :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ raise NotImplementedError diff --git a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py index d9cff8ecbb..8701a38050 100644 --- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py @@ -7,6 +7,7 @@ import numpy as np import tiktoken from openai import AzureOpenAI +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import AIModelEntity, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel): def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ base_model_name = credentials["base_model_name"] credentials_kwargs = self._to_credential_kwargs(credentials) client = AzureOpenAI(**credentials_kwargs) diff --git a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py index 779dfbb608..56b9be1c36 100644 --- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional from requests import post +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel): api_base: str = "http://api.baichuan-ai.com/v1/embeddings" def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ api_key = credentials["api_key"] diff --git a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py index 251170d1ae..d9c5726592 100644 --- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py @@ -13,6 +13,7 @@ from botocore.exceptions import ( UnknownServiceError, ) +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -30,7 +31,12 @@ logger = logging.getLogger(__name__) class BedrockTextEmbeddingModel(TextEmbeddingModel): def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ client_config = Config(region_name=credentials["aws_region"]) diff --git a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py index a1c5e98118..4da2080690 100644 --- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py @@ -5,6 +5,7 @@ import cohere import numpy as np from cohere.core import RequestOptions +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ # get model properties diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml index f886fa23b5..cdb87a55e9 100644 --- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml +++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml @@ -15,6 +15,7 @@ help: en_US: https://fireworks.ai/account/api-keys supported_model_types: - llm + - text-embedding configurate_methods: - predefined-model provider_credential_schema: diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml new file mode 100644 index 0000000000..31415a24fa --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct +label: + zh_Hans: Llama 3.2 11B Vision Instruct + en_US: Llama 3.2 11B Vision Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml new file mode 100644 index 0000000000..c2fd77d256 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-1b-instruct +label: + zh_Hans: Llama 3.2 1B Instruct + en_US: Llama 3.2 1B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.1' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml new file mode 100644 index 0000000000..4b3c459c7b --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-3b-instruct +label: + zh_Hans: Llama 3.2 3B Instruct + en_US: Llama 3.2 3B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.1' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml new file mode 100644 index 0000000000..0aece7455d --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct +label: + zh_Hans: Llama 3.2 90B Vision Instruct + en_US: Llama 3.2 90B Vision Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.9' + output: '0.9' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml new file mode 100644 index 0000000000..d7c11691cf --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml @@ -0,0 +1,12 @@ +model: WhereIsAI/UAE-Large-V1 +label: + zh_Hans: UAE-Large-V1 + en_US: UAE-Large-V1 +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 +pricing: + input: '0.008' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml new file mode 100644 index 0000000000..d09bafb4d3 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml @@ -0,0 +1,12 @@ +model: thenlper/gte-base +label: + zh_Hans: GTE-base + en_US: GTE-base +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 +pricing: + input: '0.008' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml new file mode 100644 index 0000000000..c41fa2f9d3 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml @@ -0,0 +1,12 @@ +model: thenlper/gte-large +label: + zh_Hans: GTE-large + en_US: GTE-large +model_type: text-embedding +model_properties: + context_size: 512 + max_chunks: 1 +pricing: + input: '0.008' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml new file mode 100644 index 0000000000..c9098503d9 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml @@ -0,0 +1,12 @@ +model: nomic-ai/nomic-embed-text-v1.5 +label: + zh_Hans: nomic-embed-text-v1.5 + en_US: nomic-embed-text-v1.5 +model_type: text-embedding +model_properties: + context_size: 8192 + max_chunks: 16 +pricing: + input: '0.008' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml new file mode 100644 index 0000000000..89078d3ff6 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml @@ -0,0 +1,12 @@ +model: nomic-ai/nomic-embed-text-v1 +label: + zh_Hans: nomic-embed-text-v1 + en_US: nomic-embed-text-v1 +model_type: text-embedding +model_properties: + context_size: 8192 + max_chunks: 16 +pricing: + input: '0.008' + unit: '0.000001' + currency: 'USD' diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py new file mode 100644 index 0000000000..cdce69ff38 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py @@ -0,0 +1,151 @@ +import time +from collections.abc import Mapping +from typing import Optional, Union + +import numpy as np +from openai import OpenAI + +from core.embedding.embedding_constant import EmbeddingInputType +from core.model_runtime.entities.model_entities import PriceType +from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel +from core.model_runtime.model_providers.fireworks._common import _CommonFireworks + + +class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel): + """ + Model class for Fireworks text embedding model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, + ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ + + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + extra_model_kwargs = {} + if user: + extra_model_kwargs["user"] = user + + extra_model_kwargs["encoding_format"] = "float" + + context_size = self._get_context_size(model, credentials) + max_chunks = self._get_max_chunks(model, credentials) + + inputs = [] + indices = [] + used_tokens = 0 + + for i, text in enumerate(texts): + # Here token count is only an approximation based on the GPT2 tokenizer + # TODO: Optimize for better token estimation and chunking + num_tokens = self._get_num_tokens_by_gpt2(text) + + if num_tokens >= context_size: + cutoff = int(np.floor(len(text) * (context_size / num_tokens))) + # if num tokens is larger than context length, only use the start + inputs.append(text[0:cutoff]) + else: + inputs.append(text) + indices += [i] + + batched_embeddings = [] + _iter = range(0, len(inputs), max_chunks) + + for i in _iter: + embeddings_batch, embedding_used_tokens = self._embedding_invoke( + model=model, + client=client, + texts=inputs[i : i + max_chunks], + extra_model_kwargs=extra_model_kwargs, + ) + used_tokens += embedding_used_tokens + batched_embeddings += embeddings_batch + + usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens) + return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model) + + def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :return: + """ + return sum(self._get_num_tokens_by_gpt2(text) for text in texts) + + def validate_credentials(self, model: str, credentials: Mapping) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + # transform credentials to kwargs for model instance + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + # call embedding model + self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={}) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _embedding_invoke( + self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict + ) -> tuple[list[list[float]], int]: + """ + Invoke embedding model + :param model: model name + :param client: model client + :param texts: texts to embed + :param extra_model_kwargs: extra model kwargs + :return: embeddings and used tokens + """ + response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs) + return [data.embedding for data in response.data], response.usage.total_tokens + + def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage: + """ + Calculate response usage + + :param model: model name + :param credentials: model credentials + :param tokens: input tokens + :return: usage + """ + input_price_info = self.get_price( + model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT + ) + + usage = EmbeddingUsage( + tokens=tokens, + total_tokens=tokens, + unit_price=input_price_info.unit_price, + price_unit=input_price_info.unit, + total_price=input_price_info.total_amount, + currency=input_price_info.currency, + latency=time.perf_counter() - self.started_at, + ) + + return usage diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml new file mode 100644 index 0000000000..d84e9937e0 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-001 +label: + en_US: Gemini 1.5 Flash 001 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml new file mode 100644 index 0000000000..2ff70564b2 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-002 +label: + en_US: Gemini 1.5 Flash 002 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml index bbc697e934..4e0209890a 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml new file mode 100644 index 0000000000..2aea8149f4 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash-8b-exp-0924 +label: + en_US: Gemini 1.5 Flash 8B 0924 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml index c5695e5dda..faabc5e4d1 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml index 24b1c5af8a..a22fcca941 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-flash-latest label: - en_US: Gemini 1.5 Flash + en_US: Gemini 1.5 Flash Latest model_type: llm features: - agent-thought @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml new file mode 100644 index 0000000000..dfd55c3a94 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-flash +label: + en_US: Gemini 1.5 Flash +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml new file mode 100644 index 0000000000..a1feff171d --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro-001 +label: + en_US: Gemini 1.5 Pro 001 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml new file mode 100644 index 0000000000..9ae07a06c5 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro-002 +label: + en_US: Gemini 1.5 Pro 002 +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml index 0a918e0d7b..97c68f7a18 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml index 7452ce46e7..860e4816a1 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml index b3e1ecf3af..d1bf7d269d 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-pro-latest label: - en_US: Gemini 1.5 Pro + en_US: Gemini 1.5 Pro Latest model_type: llm features: - agent-thought @@ -32,6 +32,15 @@ parameter_rules: max: 8192 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml new file mode 100644 index 0000000000..bdd70b34a2 --- /dev/null +++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml @@ -0,0 +1,48 @@ +model: gemini-1.5-pro +label: + en_US: Gemini 1.5 Pro +model_type: llm +features: + - agent-thought + - vision + - tool-call + - stream-tool-call +model_properties: + mode: chat + context_size: 2097152 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens_to_sample + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 + - name: response_format + use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml index 075e484e46..2d213d56ad 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml @@ -27,6 +27,15 @@ parameter_rules: default: 4096 min: 1 max: 4096 + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml index 4e9f59e7da..e2f487c1ee 100644 --- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml +++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml @@ -31,6 +31,15 @@ parameter_rules: max: 2048 - name: response_format use_template: response_format + - name: stream + label: + zh_Hans: 流式输出 + en_US: Stream + type: boolean + help: + zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。 + en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once. + default: false pricing: input: '0.00' output: '0.00' diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py index 3fc6787a44..e686ad08d9 100644 --- a/api/core/model_runtime/model_providers/google/llm/llm.py +++ b/api/core/model_runtime/model_providers/google/llm/llm.py @@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm import google.generativeai as genai import requests from google.api_core import exceptions -from google.generativeai import client -from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory +from google.generativeai.client import _ClientManager +from google.generativeai.types import ContentType, GenerateContentResponse from google.generativeai.types.content_types import to_part from PIL import Image @@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel): history.append(content) # Create a new ClientManager with tenant's API key - new_client_manager = client._ClientManager() + new_client_manager = _ClientManager() new_client_manager.configure(api_key=credentials["google_api_key"]) new_custom_client = new_client_manager.make_client("generative") google_model._client = new_custom_client - safety_settings = { - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - } - response = google_model.generate_content( contents=history, generation_config=genai.types.GenerationConfig(**config_kwargs), stream=stream, - safety_settings=safety_settings, tools=self._convert_tools_to_glm_tool(tools) if tools else None, request_options={"timeout": 600}, ) diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml new file mode 100644 index 0000000000..019d453723 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-11b-text-preview +label: + zh_Hans: Llama 3.2 11B Text (Preview) + en_US: Llama 3.2 11B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml new file mode 100644 index 0000000000..a44e4ff508 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-1b-preview +label: + zh_Hans: Llama 3.2 1B Text (Preview) + en_US: Llama 3.2 1B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml new file mode 100644 index 0000000000..f2fdd0a05e --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-3b-preview +label: + zh_Hans: Llama 3.2 3B Text (Preview) + en_US: Llama 3.2 3B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml new file mode 100644 index 0000000000..3b34e7c079 --- /dev/null +++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml @@ -0,0 +1,25 @@ +model: llama-3.2-90b-text-preview +label: + zh_Hans: Llama 3.2 90B Text (Preview) + en_US: Llama 3.2 90B Text (Preview) +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.1' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py index 4ad96c4233..b2e6d1b652 100644 --- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py @@ -6,6 +6,7 @@ import numpy as np import requests from huggingface_hub import HfApi, InferenceClient +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/ class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel): def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ client = InferenceClient(token=credentials["huggingfacehub_api_token"]) execute_model = model diff --git a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py index 55f3c25804..b8ff3ca549 100644 --- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py @@ -1,6 +1,7 @@ import time from typing import Optional +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ server_url = credentials["server_url"] diff --git a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py index 1396e59e18..75701ebc54 100644 --- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py @@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.hunyuan.v20230901 import hunyuan_client, models +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/jina/jina.yaml b/api/core/model_runtime/model_providers/jina/jina.yaml index 4ff6ba0f22..970b22965b 100644 --- a/api/core/model_runtime/model_providers/jina/jina.yaml +++ b/api/core/model_runtime/model_providers/jina/jina.yaml @@ -67,46 +67,3 @@ model_credential_schema: required: false type: text-input default: '8192' - - variable: task - label: - zh_Hans: 下游任务 - en_US: Downstream task - placeholder: - zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。 - en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task. - required: false - type: select - options: - - value: retrieval.query - label: - en_US: retrieval.query - - value: retrieval.passage - label: - en_US: retrieval.passage - - value: separation - label: - en_US: separation - - value: classification - label: - en_US: classification - - value: text-matching - label: - en_US: text-matching - - variable: dimensions - label: - zh_Hans: 输出维度 - en_US: Output dimensions - placeholder: - zh_Hans: 输入您的输出维度 - en_US: Enter output dimensions - required: false - type: text-input - - variable: late_chunking - label: - zh_Hans: 后期分块 - en_US: Late chunking - placeholder: - zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。 - en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings. - required: false - type: switch diff --git a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py index 6c96699ea2..b397129512 100644 --- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional from requests import post +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel): api_base: str = "https://api.jina.ai/v1" - def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict: + def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict: """ Parse model credentials @@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel): data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]} - task = credentials.get("task") - dimensions = credentials.get("dimensions") - late_chunking = credentials.get("late_chunking") - - if task is not None: - data["task"] = task - - if dimensions is not None: - data["dimensions"] = int(dimensions) - - if late_chunking is not None: - data["late_chunking"] = late_chunking + # model specific parameters + if model == "jina-embeddings-v3": + # set `task` type according to input type for the best performance + data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage" return data def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ api_key = credentials["api_key"] @@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel): url = base_url + "/embeddings" headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"} - data = self._to_payload(model=model, texts=texts, credentials=credentials) + data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type) try: response = post(url, headers=headers, data=dumps(data)) diff --git a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py index 7d258be81e..ab8ca76c2f 100644 --- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py @@ -5,6 +5,7 @@ from typing import Optional from requests import post from yarl import URL +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE class LocalAITextEmbeddingModel(TextEmbeddingModel): """ - Model class for Jina text embedding model. + Model class for LocalAI text embedding model. """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ if len(texts) != 1: diff --git a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py index 76fd1342bd..74d2a221d1 100644 --- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional from requests import post +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel): api_base: str = "https://api.minimax.chat/v1/embeddings" def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ api_key = credentials["minimax_api_key"] diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py index 05d9a9a0c6..68b7b448bf 100644 --- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional import requests +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel): api_base: str = "https://api.mixedbread.ai/v1" def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ api_key = credentials["api_key"] diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py index ccbfd196a9..857dfb5f41 100644 --- a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py @@ -5,6 +5,7 @@ from typing import Optional from nomic import embed from nomic import login as nomic_login +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import ( EmbeddingUsage, @@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel): credentials: dict, texts: list[str], user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ embeddings, prompt_tokens, total_tokens = self.embed_text( diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py index 00cec265d5..936ceb8dd2 100644 --- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional from requests import post +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel): models: list[str] = ["NV-Embed-QA"] def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ api_key = credentials["api_key"] diff --git a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py index 80ad2be9f5..4de9296cca 100644 --- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py @@ -6,6 +6,7 @@ from typing import Optional import numpy as np import oci +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ # get model properties diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index ff732e6925..a7ea53e0e9 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel): if chunk_json["done"]: # calculate num tokens - if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: - # transform usage + if "prompt_eval_count" in chunk_json: prompt_tokens = chunk_json["prompt_eval_count"] - completion_tokens = chunk_json["eval_count"] else: - # calculate num tokens - prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) - completion_tokens = self._get_num_tokens_by_gpt2(full_text) + prompt_message_content = prompt_messages[0].content + if isinstance(prompt_message_content, str): + prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content) + else: + content_text = "" + for message_content in prompt_message_content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + content_text += message_content.data + prompt_tokens = self._get_num_tokens_by_gpt2(content_text) + + completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text)) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) diff --git a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py index b4c61d8a6d..5cf3f1c6fa 100644 --- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py @@ -8,6 +8,7 @@ from urllib.parse import urljoin import numpy as np import requests +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import ( AIModelEntity, @@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py index 535d8388bc..16f1a0cfa1 100644 --- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py @@ -6,6 +6,7 @@ import numpy as np import tiktoken from openai import OpenAI +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ # transform credentials to kwargs for model instance diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py index e83cfdf873..64fa6aaa3c 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py @@ -7,6 +7,7 @@ from urllib.parse import urljoin import numpy as np import requests +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import ( AIModelEntity, @@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py index 00e583cc79..c5d4330912 100644 --- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py @@ -5,6 +5,7 @@ from typing import Optional from requests import post from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import ( @@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ server_url = credentials["server_url"] diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py index b62a2d2aaf..1e86f351c8 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py @@ -7,6 +7,7 @@ from urllib.parse import urljoin import numpy as np import requests +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import ( AIModelEntity, @@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py index 71b6fb99c4..9f724a77ac 100644 --- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional from replicate import Client as ReplicateClient +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel): def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30) if "model_version" in credentials: diff --git a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py index 04789197ee..97b7692044 100644 --- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py +++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py @@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel): Model class for Cohere large language model. """ - sagemaker_client: Any = None + sagemaker_session: Any = None predictor: Any = None + sagemaker_endpoint: str = None def _handle_chat_generate_response( self, @@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel): :param user: unique user id :return: full response or stream response chunk generator result """ - if not self.sagemaker_client: + if not self.sagemaker_session: access_key = credentials.get("aws_access_key_id") secret_key = credentials.get("aws_secret_access_key") aws_region = credentials.get("aws_region") @@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel): else: boto_session = boto3.Session() - self.sagemaker_client = boto_session.client("sagemaker") - sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client) + sagemaker_client = boto_session.client("sagemaker") + self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client) + + if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"): + self.sagemaker_endpoint = credentials.get("sagemaker_endpoint") self.predictor = Predictor( - endpoint_name=credentials.get("sagemaker_endpoint"), - sagemaker_session=sagemaker_session, + endpoint_name=self.sagemaker_endpoint, + sagemaker_session=self.sagemaker_session, serializer=serializers.JSONSerializer(), ) diff --git a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py index d55144f8a7..8f993ce672 100644 --- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py @@ -6,6 +6,7 @@ from typing import Any, Optional import boto3 +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel): return embeddings def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ # get model properties diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml index 43db4aed11..a3e5d0981f 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml @@ -1,25 +1,38 @@ -- Qwen/Qwen2.5-7B-Instruct -- Qwen/Qwen2.5-14B-Instruct -- Qwen/Qwen2.5-32B-Instruct - Qwen/Qwen2.5-72B-Instruct +- Qwen/Qwen2.5-Math-72B-Instruct +- Qwen/Qwen2.5-32B-Instruct +- Qwen/Qwen2.5-14B-Instruct +- Qwen/Qwen2.5-7B-Instruct +- Qwen/Qwen2.5-Coder-7B-Instruct +- deepseek-ai/DeepSeek-V2.5 - Qwen/Qwen2-72B-Instruct - Qwen/Qwen2-57B-A14B-Instruct - Qwen/Qwen2-7B-Instruct - Qwen/Qwen2-1.5B-Instruct -- 01-ai/Yi-1.5-34B-Chat -- 01-ai/Yi-1.5-9B-Chat-16K -- 01-ai/Yi-1.5-6B-Chat -- THUDM/glm-4-9b-chat -- deepseek-ai/DeepSeek-V2.5 - deepseek-ai/DeepSeek-V2-Chat - deepseek-ai/DeepSeek-Coder-V2-Instruct +- THUDM/glm-4-9b-chat +- THUDM/chatglm3-6b +- 01-ai/Yi-1.5-34B-Chat-16K +- 01-ai/Yi-1.5-9B-Chat-16K +- 01-ai/Yi-1.5-6B-Chat +- internlm/internlm2_5-20b-chat - internlm/internlm2_5-7b-chat -- google/gemma-2-27b-it -- google/gemma-2-9b-it -- meta-llama/Meta-Llama-3-70B-Instruct -- meta-llama/Meta-Llama-3-8B-Instruct - meta-llama/Meta-Llama-3.1-405B-Instruct - meta-llama/Meta-Llama-3.1-70B-Instruct - meta-llama/Meta-Llama-3.1-8B-Instruct -- mistralai/Mixtral-8x7B-Instruct-v0.1 +- meta-llama/Meta-Llama-3-70B-Instruct +- meta-llama/Meta-Llama-3-8B-Instruct +- google/gemma-2-27b-it +- google/gemma-2-9b-it - mistralai/Mistral-7B-Instruct-v0.2 +- Pro/Qwen/Qwen2-7B-Instruct +- Pro/Qwen/Qwen2-1.5B-Instruct +- Pro/THUDM/glm-4-9b-chat +- Pro/THUDM/chatglm3-6b +- Pro/01-ai/Yi-1.5-9B-Chat-16K +- Pro/01-ai/Yi-1.5-6B-Chat +- Pro/internlm/internlm2_5-7b-chat +- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct +- Pro/meta-llama/Meta-Llama-3-8B-Instruct +- Pro/google/gemma-2-9b-it diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml index 27664eab6c..89fb153ba0 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml @@ -28,3 +28,4 @@ pricing: output: '0' unit: '0.000001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml index fd7aada428..2785e7496f 100644 --- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml @@ -28,3 +28,4 @@ pricing: output: '1.26' unit: '0.000001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py index 6cdf4933b4..c5dcc12610 100644 --- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py @@ -1,5 +1,6 @@ from typing import Optional +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( OAICompatEmbeddingModel, @@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel): super().validate_credentials(model, credentials) def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: + """ + Invoke text embedding model + + :param model: model name + :param credentials: model credentials + :param texts: texts to embed + :param user: unique user id + :param input_type: input type + :return: embeddings result + """ self._add_custom_parameters(credentials) return super()._invoke(model, credentials, texts, user) diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py index 57193dc031..1181ba699a 100644 --- a/api/core/model_runtime/model_providers/spark/llm/llm.py +++ b/api/core/model_runtime/model_providers/spark/llm/llm.py @@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel): :param prompt_messages: prompt messages :return: llm response chunk generator result """ + completion = "" for index, content in enumerate(client.subscribe()): if isinstance(content, dict): delta = content["data"] else: delta = content - + completion += delta assistant_prompt_message = AssistantPromptMessage( content=delta or "", ) - + temp_assistant_prompt_message = AssistantPromptMessage( + content=completion, + ) prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) - completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) + completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message]) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml index d0ff443827..34a57d1fc0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: farui-plus label: en_US: farui-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index f90c7f075f..3e3585b30a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -18,7 +18,7 @@ from dashscope.common.error import ( UnsupportedModel, ) -from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, ImagePromptMessageContent, @@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import ( FetchFrom, I18nObject, ModelFeature, + ModelPropertyKey, ModelType, ParameterRule, ParameterType, @@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel): :param tools: tools for tool calling :return: """ + # Check if the model was added via get_customizable_model_schema + if self.get_customizable_model_schema(model, credentials) is not None: + # For custom models, tokens are not calculated. + return 0 + if model in {"qwen-turbo-chat", "qwen-plus-chat"}: model = model.replace("-chat", "") if model == "farui-plus": @@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel): :param credentials: model credentials :return: AIModelEntity or None """ - rules = [ - ParameterRule( - name="temperature", - type=ParameterType.FLOAT, - use_template="temperature", - label=I18nObject(zh_Hans="温度", en_US="Temperature"), - ), - ParameterRule( - name="top_p", - type=ParameterType.FLOAT, - use_template="top_p", - label=I18nObject(zh_Hans="Top P", en_US="Top P"), - ), - ParameterRule( - name="top_k", - type=ParameterType.INT, - min=0, - max=99, - label=I18nObject(zh_Hans="top_k", en_US="top_k"), - ), - ParameterRule( - name="max_tokens", - type=ParameterType.INT, - min=1, - max=128000, - default=1024, - label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"), - ), - ParameterRule( - name="seed", - type=ParameterType.INT, - default=1234, - label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"), - ), - ParameterRule( - name="repetition_penalty", - type=ParameterType.FLOAT, - default=1.1, - label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"), - ), - ] - - entity = AIModelEntity( + return AIModelEntity( model=model, - label=I18nObject(en_US=model), - fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + label=I18nObject(en_US=model, zh_Hans=model), model_type=ModelType.LLM, - model_properties={}, - parameter_rules=rules, + features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL] + if credentials.get("function_calling_type") == "tool_call" + else [], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)), + ModelPropertyKey.MODE: LLMMode.CHAT.value, + }, + parameter_rules=[ + ParameterRule( + name="temperature", + use_template="temperature", + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + default=512, + min=1, + max=int(credentials.get("max_tokens", 1024)), + label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + type=ParameterType.INT, + ), + ParameterRule( + name="top_p", + use_template="top_p", + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="top_k", + use_template="top_k", + label=I18nObject(en_US="Top K", zh_Hans="Top K"), + type=ParameterType.FLOAT, + ), + ParameterRule( + name="frequency_penalty", + use_template="frequency_penalty", + label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"), + type=ParameterType.FLOAT, + ), + ], ) - - return entity diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml index d9792e71ee..64a3f33133 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo-0919 label: en_US: qwen-coder-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml index 0b03505c45..a4c93f7047 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo-latest label: en_US: qwen-coder-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml index 2a6c040853..ff68faed80 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-coder-turbo label: en_US: qwen-coder-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml index bad7f4f472..c3dbb3616f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml @@ -1,4 +1,4 @@ -# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6 +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-long label: en_US: qwen-long diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml index c14aee1e1e..42fe1f6862 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-0816 label: en_US: qwen-math-plus-0816 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml index 9d74eeca3e..9b6567b8cd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-0919 label: en_US: qwen-math-plus-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml index b8601a969a..b2a2393b36 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus-latest label: en_US: qwen-math-plus-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml index 4a948be597..63f4b7ff0a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-plus label: en_US: qwen-math-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml index bffe324a96..4da90eec3e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo-0919 label: en_US: qwen-math-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml index 0747e96614..d29f8851dd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo-latest label: en_US: qwen-math-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml index dffb5557ff..2a8f7f725e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-math-turbo label: en_US: qwen-math-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml index 8ae159f1bf..ef1841b517 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0107 label: en_US: qwen-max-0107 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml index 93fb37254e..a2ea5df130 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0403, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0403 label: en_US: qwen-max-0403 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml index a5c9d49609..a467665f11 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0428, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0428 label: en_US: qwen-max-0428 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml index e4a6dae637..78661eaea0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-0919 label: en_US: qwen-max-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml index 6fae8a7d38..6f4674576b 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-1201 label: en_US: qwen-max-1201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml index 8e20968859..8b5f005473 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-latest label: en_US: qwen-max-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml index 9bc50c73fc..098494ff95 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max-longcontext label: en_US: qwen-max-longcontext diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml index c6a64dc507..9d0d3f8db3 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-max, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf) model: qwen-max label: en_US: qwen-max diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml index 430599300b..0b1a6f81df 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0206 label: en_US: qwen-plus-0206 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml index 906995d2b9..7706005bb5 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0624 label: en_US: qwen-plus-0624 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml index b33e725dd0..348276fc08 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0723, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0723 label: en_US: qwen-plus-0723 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml index bb394fad81..29f125135e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0806, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0806 label: en_US: qwen-plus-0806 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml index 118e304a97..905fa1e102 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-0919 label: en_US: qwen-plus-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml index 761312bc38..c7a3549727 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-chat label: en_US: qwen-plus-chat diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml index 430872fb31..608f52c296 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus-latest label: en_US: qwen-plus-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index f3fce30209..9089e57255 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-plus, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk) model: qwen-plus label: en_US: qwen-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml index 2628d824fe..7ee0d44f2f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml @@ -1,3 +1,6 @@ +# this model corresponds to qwen-turbo-0206, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) + model: qwen-turbo-0206 label: en_US: qwen-turbo-0206 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml index 8097459bf0..20a3f7eb64 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-0624, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-0624 label: en_US: qwen-turbo-0624 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml index e43beeb195..ba73dec363 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-0919, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-0919 label: en_US: qwen-turbo-0919 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml index c30cb7ca10..d785b7fe85 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-chat label: en_US: qwen-turbo-chat diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml index e443d6888b..fe38a4283c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo-latest, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo-latest label: en_US: qwen-turbo-latest diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index 33f05967c2..215c9ec5fc 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -1,3 +1,5 @@ +# this model corresponds to qwen-turbo, for more details +# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub) model: qwen-turbo label: en_US: qwen-turbo diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml index 63b6074d0d..d80168ffc3 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max-0201 label: en_US: qwen-vl-max-0201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml index fd20377002..50e10226a5 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max-0809 label: en_US: qwen-vl-max-0809 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml index 31a9fb51bb..21b127f56c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-max label: en_US: qwen-vl-max diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml index 5f90cf48bc..03cb039d15 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus-0201 label: en_US: qwen-vl-plus-0201 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml index 97820c0f3a..67b2b2ebdd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus-0809 label: en_US: qwen-vl-plus-0809 diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml index 6af36cd6f3..f55764c6c0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen-vl-plus label: en_US: qwen-vl-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml index 158e2c7ee1..ea157f42de 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-1.5b-instruct label: en_US: qwen2-math-1.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml index e26a6923d1..37052a9233 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-72b-instruct label: en_US: qwen2-math-72b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml index 589119b26e..e182f1c27f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2-math-7b-instruct label: en_US: qwen2-math-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml index dd608fbf76..9e75ccc1f2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-0.5b-instruct label: en_US: qwen2.5-0.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml index 08237b3958..67c9d31243 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-1.5b-instruct label: en_US: qwen2.5-1.5b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml index 640b019703..2a38be921c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-14b-instruct label: en_US: qwen2.5-14b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml index 3a90ca7532..e6e4fbf978 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-32b-instruct label: en_US: qwen2.5-32b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml index b79755eb9b..8f250379a7 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-3b-instruct label: en_US: qwen2.5-3b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml index e9dd51a341..bb3cdd6141 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-72b-instruct label: en_US: qwen2.5-72b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml index 04f26cf5fe..fdcd3d4275 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-7b-instruct label: en_US: qwen2.5-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml index 04f26cf5fe..fdcd3d4275 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models model: qwen2.5-7b-instruct label: en_US: qwen2.5-7b-instruct diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml index f4303c53d3..52e35d8b50 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v1 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml index f6be3544ed..5bb6a8f424 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v2 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml index 171a379ee2..d8af0e2b63 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml @@ -1,3 +1,4 @@ +# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw model: text-embedding-v3 model_type: text-embedding model_properties: diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py index 5783d2e383..736cd44df8 100644 --- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py @@ -4,6 +4,7 @@ from typing import Optional import dashscope import numpy as np +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import ( EmbeddingUsage, @@ -27,6 +28,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel): credentials: dict, texts: list[str], user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -35,6 +37,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ credentials_kwargs = self._to_credential_kwargs(credentials) diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml index fabe6d90e6..1a09c20fd9 100644 --- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml +++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml @@ -37,14 +37,51 @@ model_credential_schema: en_US: Model Name zh_Hans: 模型名称 placeholder: - en_US: Enter full model name - zh_Hans: 输入模型全称 + en_US: Enter your model name + zh_Hans: 输入模型名称 credential_form_schemas: - variable: dashscope_api_key - required: true label: en_US: API Key type: secret-input + required: true placeholder: zh_Hans: 在此输入您的 API Key en_US: Enter your API Key + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + type: text-input + default: '4096' + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: max_tokens + label: + zh_Hans: 最大 token 上限 + en_US: Upper bound for max tokens + default: '4096' + type: text-input + show_on: + - variable: __model_type + value: llm + - variable: function_calling_type + label: + en_US: Function calling + type: select + required: false + default: no_call + options: + - value: no_call + label: + en_US: Not Support + zh_Hans: 不支持 + - value: function_call + label: + en_US: Support + zh_Hans: 支持 + show_on: + - variable: __model_type + value: llm diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py index edd4a36d98..b6509cd26c 100644 --- a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py @@ -7,6 +7,7 @@ import numpy as np from openai import OpenAI from tokenizers import Tokenizer +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -22,7 +23,14 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel): def _get_tokenizer(self) -> Tokenizer: return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer") - def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult: + def _invoke( + self, + model: str, + credentials: dict, + texts: list[str], + user: str | None = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, + ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -30,6 +38,7 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml similarity index 96% rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml index c308f0a322..f5386be06d 100644 --- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-flash-001 label: - en_US: Gemini 1.5 Flash + en_US: Gemini 1.5 Flash 001 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml new file mode 100644 index 0000000000..97bd44f06b --- /dev/null +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml @@ -0,0 +1,37 @@ +model: gemini-1.5-flash-002 +label: + en_US: Gemini 1.5 Flash 002 +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + en_US: Top k + type: int + help: + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_output_tokens + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml similarity index 96% rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml index 744863e773..5e08f2294e 100644 --- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml @@ -1,6 +1,6 @@ model: gemini-1.5-pro-001 label: - en_US: Gemini 1.5 Pro + en_US: Gemini 1.5 Pro 001 model_type: llm features: - agent-thought diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml new file mode 100644 index 0000000000..8f327ea2f3 --- /dev/null +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml @@ -0,0 +1,37 @@ +model: gemini-1.5-pro-002 +label: + en_US: Gemini 1.5 Pro 002 +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + en_US: Top k + type: int + help: + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_output_tokens + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml new file mode 100644 index 0000000000..0f5eb34c0c --- /dev/null +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml @@ -0,0 +1,37 @@ +model: gemini-flash-experimental +label: + en_US: Gemini Flash Experimental +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + en_US: Top k + type: int + help: + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_output_tokens + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml new file mode 100644 index 0000000000..fa31cabb85 --- /dev/null +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml @@ -0,0 +1,37 @@ +model: gemini-pro-experimental +label: + en_US: Gemini Pro Experimental +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 1048576 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + en_US: Top k + type: int + help: + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_output_tokens + use_template: max_tokens + required: true + default: 8192 + min: 1 + max: 8192 +pricing: + input: '0.00' + output: '0.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py index da69b7cdf3..1dd785d545 100644 --- a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py +++ b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py @@ -2,6 +2,7 @@ import base64 import io import json import logging +import time from collections.abc import Generator from typing import Optional, Union, cast @@ -20,7 +21,6 @@ from google.api_core import exceptions from google.cloud import aiplatform from google.oauth2 import service_account from PIL import Image -from vertexai.generative_models import HarmBlockThreshold, HarmCategory from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.message_entities import ( @@ -34,6 +34,7 @@ from core.model_runtime.entities.message_entities import ( ToolPromptMessage, UserPromptMessage, ) +from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.errors.invoke import ( InvokeAuthorizationError, InvokeBadRequestError, @@ -503,20 +504,12 @@ class VertexAiLargeLanguageModel(LargeLanguageModel): else: history.append(content) - safety_settings = { - HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, - HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, - } - google_model = glm.GenerativeModel(model_name=model, system_instruction=system_instruction) response = google_model.generate_content( contents=history, generation_config=glm.GenerationConfig(**config_kwargs), stream=stream, - safety_settings=safety_settings, tools=self._convert_tools_to_glm_tool(tools) if tools else None, ) diff --git a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py index 519373a7f3..fce9544df0 100644 --- a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py @@ -9,6 +9,7 @@ from google.cloud import aiplatform from google.oauth2 import service_account from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import ( AIModelEntity, @@ -30,7 +31,12 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -38,6 +44,8 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel): :param model: model name :param credentials: model credentials :param texts: texts to embed + :param user: unique user id + :param input_type: input type :return: embeddings result """ service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"])) diff --git a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py index 9cba2cb879..0dd4037c95 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py @@ -2,6 +2,7 @@ import time from decimal import Decimal from typing import Optional +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import ( AIModelEntity, @@ -41,7 +42,12 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -50,6 +56,7 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ if ArkClientV3.is_legacy(credentials): diff --git a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py index 4d6f6dccd0..c21d0c0552 100644 --- a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py @@ -7,6 +7,7 @@ from typing import Any, Optional import numpy as np from requests import Response, post +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.invoke import InvokeError @@ -70,7 +71,12 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel): return WenxinTextEmbedding(api_key, secret_key) def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -79,6 +85,7 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ diff --git a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py index 8043af1d6c..1627239132 100644 --- a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py @@ -3,6 +3,7 @@ from typing import Optional from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult @@ -25,7 +26,12 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -40,6 +46,7 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ server_url = credentials["server_url"] diff --git a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py index ee20954381..14a529dddf 100644 --- a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py @@ -1,6 +1,7 @@ import time from typing import Optional +from core.embedding.embedding_constant import EmbeddingInputType from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -15,7 +16,12 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel): """ def _invoke( - self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None + self, + model: str, + credentials: dict, + texts: list[str], + user: Optional[str] = None, + input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT, ) -> TextEmbeddingResult: """ Invoke text embedding model @@ -24,6 +30,7 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel): :param credentials: model credentials :param texts: texts to embed :param user: unique user id + :param input_type: input type :return: embeddings result """ credentials_kwargs = self._to_credential_kwargs(credentials) diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py index 612542dab1..6dcd98dcfd 100644 --- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py +++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py @@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel): class AnalyticdbVector(BaseVector): - _instance = None - _init = False - - def __new__(cls, *args, **kwargs): - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - def __init__(self, collection_name: str, config: AnalyticdbConfig): - # collection_name must be updated every time self._collection_name = collection_name.lower() - if AnalyticdbVector._init: - return try: from alibabacloud_gpdb20160503.client import Client from alibabacloud_tea_openapi import models as open_api_models @@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector): self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params()) self._client = Client(self._client_config) self._initialize() - AnalyticdbVector._init = True def _initialize(self) -> None: cache_key = f"vector_indexing_{self.config.instance_id}" @@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector): documents = [] for match in response.body.matches.match: if match.score > score_threshold: + metadata = json.loads(match.metadata.get("metadata_")) + metadata["score"] = match.score doc = Document( page_content=match.metadata.get("page_content"), - metadata=json.loads(match.metadata.get("metadata_")), + metadata=metadata, ) documents.append(doc) + documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True) return documents def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: @@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector): for match in response.body.matches.match: if match.score > score_threshold: metadata = json.loads(match.metadata.get("metadata_")) + metadata["score"] = match.score doc = Document( page_content=match.metadata.get("page_content"), vector=match.metadata.get("vector"), metadata=metadata, ) documents.append(doc) + documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True) return documents def delete(self) -> None: diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py index 1a0dc7f48b..22e191340d 100644 --- a/api/core/rag/datasource/vdb/vector_base.py +++ b/api/core/rag/datasource/vdb/vector_base.py @@ -45,6 +45,7 @@ class BaseVector(ABC): def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: raise NotImplementedError + @abstractmethod def delete(self) -> None: raise NotImplementedError diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index fe7eaa32e6..0ffc89b214 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -124,7 +124,7 @@ class ExtractProcessor: extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url) elif file_extension == ".xml": extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url) - elif file_extension == "epub": + elif file_extension == ".epub": extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url) else: # txt @@ -146,7 +146,7 @@ class ExtractProcessor: extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by) elif file_extension == ".csv": extractor = CSVExtractor(file_path, autodetect_encoding=True) - elif file_extension == "epub": + elif file_extension == ".epub": extractor = UnstructuredEpubExtractor(file_path) else: # txt diff --git a/api/core/tools/provider/builtin/comfyui/comfyui.yaml b/api/core/tools/provider/builtin/comfyui/comfyui.yaml index 066fd85308..3891eebf3a 100644 --- a/api/core/tools/provider/builtin/comfyui/comfyui.yaml +++ b/api/core/tools/provider/builtin/comfyui/comfyui.yaml @@ -39,4 +39,4 @@ credentials_for_provider: en_US: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors zh_Hans: ComfyUI服务器的模型名称, 比如 xxx.safetensors pt_BR: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors - url: https://docs.dify.ai/tutorials/tool-configuration/comfyui + url: https://github.com/comfyanonymous/ComfyUI#installing diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml index 58ad6d8694..589bc3433d 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml @@ -2,14 +2,14 @@ identity: name: jina_reader author: Dify label: - en_US: JinaReader - zh_Hans: JinaReader - pt_BR: JinaReader + en_US: Fetch Single Page + zh_Hans: 获取单页面 + pt_BR: Fetch Single Page description: human: - en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost. - zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。 - pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo. + en_US: Fetch the target URL (can be a PDF) and convert it into a LLM-friendly markdown. + zh_Hans: 获取目标网址(可以是 PDF),并将其转换为适合大模型处理的 Markdown 格式。 + pt_BR: Busque a URL de destino (que pode ser um PDF) e converta em um Markdown LLM-friendly. llm: A tool for scraping webpages. Input should be a URL. parameters: - name: url @@ -17,13 +17,13 @@ parameters: required: true label: en_US: URL - zh_Hans: 网页链接 + zh_Hans: 网址 pt_BR: URL human_description: - en_US: used for linking to webpages - zh_Hans: 用于链接到网页 - pt_BR: used for linking to webpages - llm_description: url for scraping + en_US: Web link + zh_Hans: 网页链接 + pt_BR: URL da web + llm_description: url para scraping form: llm - name: request_params type: string @@ -31,14 +31,14 @@ parameters: label: en_US: Request params zh_Hans: 请求参数 - pt_BR: Request params + pt_BR: Parâmetros de solicitação human_description: en_US: | request parameters, format: {"key1": "value1", "key2": "value2"} zh_Hans: | 请求参数,格式:{"key1": "value1", "key2": "value2"} pt_BR: | - request parameters, format: {"key1": "value1", "key2": "value2"} + parâmetros de solicitação, formato: {"key1": "value1", "key2": "value2"} llm_description: request parameters form: llm - name: target_selector @@ -51,7 +51,7 @@ parameters: human_description: en_US: css selector for scraping specific elements zh_Hans: css 选择器用于抓取特定元素 - pt_BR: css selector for scraping specific elements + pt_BR: css selector para scraping de elementos específicos llm_description: css selector of the target element to scrape form: form - name: wait_for_selector @@ -64,7 +64,7 @@ parameters: human_description: en_US: css selector for waiting for specific elements zh_Hans: css 选择器用于等待特定元素 - pt_BR: css selector for waiting for specific elements + pt_BR: css selector para aguardar elementos específicos llm_description: css selector of the target element to wait for form: form - name: image_caption @@ -77,8 +77,8 @@ parameters: pt_BR: Legenda da imagem human_description: en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." - zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。" - pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." + zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。" + pt_BR: "Adiciona legendas a todas as imagens na URL especificada, adicionando 'Imagem [idx]: [legenda]' como uma tag alt para aquelas que não têm uma. Isso permite que os modelos LLM inferiores interajam com as imagens em atividades como raciocínio e resumo." llm_description: Captions all images at the specified URL form: form - name: gather_all_links_at_the_end @@ -91,8 +91,8 @@ parameters: pt_BR: Coletar todos os links ao final human_description: en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. - zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。 - pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + zh_Hans: 末尾将添加“按钮和链接”部分,方便下游模型或网络代理做页面导航或执行进一步操作。 + pt_BR: Um "Botões & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. llm_description: Gather all links at the end form: form - name: gather_all_images_at_the_end @@ -105,8 +105,8 @@ parameters: pt_BR: Coletar todas as imagens ao final human_description: en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. - zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。 - pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + zh_Hans: 末尾会新增“图片”部分,方便下游模型全面了解页面的视觉内容,提升推理效果。 + pt_BR: Um "Imagens" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. llm_description: Gather all images at the end form: form - name: proxy_server diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml index 2bc70e1be1..e58c639e56 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml @@ -2,13 +2,14 @@ identity: name: jina_search author: Dify label: - en_US: JinaSearch - zh_Hans: JinaSearch - pt_BR: JinaSearch + en_US: Search the web + zh_Hans: 联网搜索 + pt_BR: Search the web description: human: - en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web. - zh_Hans: 在网络上搜索返回前 5 个结果。 + en_US: Search on the public web of a given query and return the top results as LLM-friendly markdown. + zh_Hans: 针对给定的查询在互联网上进行搜索,并以适合大模型处理的 Markdown 格式返回最相关的结果。 + pt_BR: Procurar na web pública de uma consulta fornecida e retornar os melhores resultados como markdown para LLMs. llm: A tool for searching results on the web for grounding. Input should be a simple question. parameters: - name: query @@ -16,11 +17,13 @@ parameters: required: true label: en_US: Question (Query) - zh_Hans: 信息查询 + zh_Hans: 查询 + pt_BR: Pergunta (Consulta) human_description: en_US: used to find information on the web zh_Hans: 在网络上搜索信息 - llm_description: simple question to ask on the web + pt_BR: Usado para encontrar informações na web + llm_description: Pergunta simples para fazer na web form: llm - name: image_caption type: boolean @@ -32,7 +35,7 @@ parameters: pt_BR: Legenda da imagem human_description: en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." - zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。" + zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。" pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing." llm_description: Captions all images at the specified URL form: form @@ -46,8 +49,8 @@ parameters: pt_BR: Coletar todos os links ao final human_description: en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. - zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。 - pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions. + zh_Hans: 末尾将添加“按钮和链接”部分,汇总页面上的所有链接。方便下游模型或网络代理做页面导航或执行进一步操作。 + pt_BR: Um "Botão & Links" seção será criada no final. Isso ajuda os LLMs ou agentes da web navegando pela página ou executar ações adicionais. llm_description: Gather all links at the end form: form - name: gather_all_images_at_the_end @@ -60,8 +63,8 @@ parameters: pt_BR: Coletar todas as imagens ao final human_description: en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. - zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。 - pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning. + zh_Hans: 末尾会新增“图片”部分,汇总页面上的所有图片。方便下游模型概览页面的视觉内容,提升推理效果。 + pt_BR: Um "Imagens" seção será criada no final. Isso fornece uma visão geral de todas as imagens na página para os LLMs, que pode melhorar a razão. llm_description: Gather all images at the end form: form - name: proxy_server @@ -74,7 +77,7 @@ parameters: human_description: en_US: Use proxy to access URLs zh_Hans: 利用代理访问 URL - pt_BR: Use proxy to access URLs + pt_BR: Usar proxy para acessar URLs llm_description: Use proxy to access URLs form: form - name: no_cache @@ -83,7 +86,7 @@ parameters: default: false label: en_US: Bypass the Cache - zh_Hans: 绕过缓存 + zh_Hans: 是否绕过缓存 pt_BR: Ignorar o cache human_description: en_US: Bypass the Cache diff --git a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml index 62a5c7e7ba..74885cdf9a 100644 --- a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml +++ b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml @@ -2,11 +2,14 @@ identity: name: jina_tokenizer author: hjlarry label: - en_US: JinaTokenizer + en_US: Segment + zh_Hans: 切分器 + pt_BR: Segment description: human: - en_US: Free API to tokenize text and segment long text into chunks. - zh_Hans: 免费的API可以将文本tokenize,也可以将长文本分割成多个部分。 + en_US: Split long text into chunks and do tokenization. + zh_Hans: 将长文本拆分成小段落,并做分词处理。 + pt_BR: Dividir o texto longo em pedaços e fazer tokenização. llm: Free API to tokenize text and segment long text into chunks. parameters: - name: content @@ -15,6 +18,7 @@ parameters: label: en_US: Content zh_Hans: 内容 + pt_BR: Conteúdo llm_description: the content which need to tokenize or segment form: llm - name: return_tokens @@ -23,18 +27,22 @@ parameters: label: en_US: Return the tokens zh_Hans: 是否返回tokens + pt_BR: Retornar os tokens human_description: en_US: Return the tokens and their corresponding ids in the response. zh_Hans: 返回tokens及其对应的ids。 + pt_BR: Retornar os tokens e seus respectivos ids na resposta. form: form - name: return_chunks type: boolean label: en_US: Return the chunks zh_Hans: 是否分块 + pt_BR: Retornar os chunks human_description: en_US: Chunking the input into semantically meaningful segments while handling a wide variety of text types and edge cases based on common structural cues. - zh_Hans: 将输入分块为具有语义意义的片段,同时根据常见的结构线索处理各种文本类型和边缘情况。 + zh_Hans: 将输入文本分块为语义有意义的片段,同时基于常见的结构线索处理各种文本类型和特殊情况。 + pt_BR: Dividir o texto de entrada em segmentos semanticamente significativos, enquanto lida com uma ampla variedade de tipos de texto e casos de borda com base em pistas estruturais comuns. form: form - name: tokenizer type: select diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.py b/api/core/tools/provider/builtin/stepfun/stepfun.py index b24f730c95..239db85b11 100644 --- a/api/core/tools/provider/builtin/stepfun/stepfun.py +++ b/api/core/tools/provider/builtin/stepfun/stepfun.py @@ -16,7 +16,7 @@ class StepfunProvider(BuiltinToolProviderController): user_id="", tool_parameters={ "prompt": "cute girl, blue eyes, white hair, anime style", - "size": "1024x1024", + "size": "256x256", "n": 1, }, ) diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.yaml b/api/core/tools/provider/builtin/stepfun/stepfun.yaml index 1f841ec369..e8139a4d7d 100644 --- a/api/core/tools/provider/builtin/stepfun/stepfun.yaml +++ b/api/core/tools/provider/builtin/stepfun/stepfun.yaml @@ -4,11 +4,9 @@ identity: label: en_US: Image-1X zh_Hans: 阶跃星辰绘画 - pt_BR: Image-1X description: en_US: Image-1X zh_Hans: 阶跃星辰绘画 - pt_BR: Image-1X icon: icon.png tags: - image @@ -20,27 +18,16 @@ credentials_for_provider: label: en_US: Stepfun API key zh_Hans: 阶跃星辰API key - pt_BR: Stepfun API key - help: - en_US: Please input your stepfun API key - zh_Hans: 请输入你的阶跃星辰 API key - pt_BR: Please input your stepfun API key placeholder: - en_US: Please input your stepfun API key + en_US: Please input your Stepfun API key zh_Hans: 请输入你的阶跃星辰 API key - pt_BR: Please input your stepfun API key + url: https://platform.stepfun.com/interface-key stepfun_base_url: type: text-input required: false label: en_US: Stepfun base URL zh_Hans: 阶跃星辰 base URL - pt_BR: Stepfun base URL - help: - en_US: Please input your Stepfun base URL - zh_Hans: 请输入你的阶跃星辰 base URL - pt_BR: Please input your Stepfun base URL placeholder: en_US: Please input your Stepfun base URL zh_Hans: 请输入你的阶跃星辰 base URL - pt_BR: Please input your Stepfun base URL diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.py b/api/core/tools/provider/builtin/stepfun/tools/image.py index 0b92b122bf..eb55dae518 100644 --- a/api/core/tools/provider/builtin/stepfun/tools/image.py +++ b/api/core/tools/provider/builtin/stepfun/tools/image.py @@ -1,4 +1,3 @@ -import random from typing import Any, Union from openai import OpenAI @@ -19,7 +18,7 @@ class StepfunTool(BuiltinTool): """ invoke tools """ - base_url = self.runtime.credentials.get("stepfun_base_url", "https://api.stepfun.com") + base_url = self.runtime.credentials.get("stepfun_base_url") or "https://api.stepfun.com" base_url = str(URL(base_url) / "v1") client = OpenAI( @@ -28,9 +27,7 @@ class StepfunTool(BuiltinTool): ) extra_body = {} - model = tool_parameters.get("model", "step-1x-medium") - if not model: - return self.create_text_message("Please input model name") + model = "step-1x-medium" # prompt prompt = tool_parameters.get("prompt", "") if not prompt: @@ -67,9 +64,3 @@ class StepfunTool(BuiltinTool): ) ) return result - - @staticmethod - def _generate_random_id(length=8): - characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" - random_id = "".join(random.choices(characters, k=length)) - return random_id diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.yaml b/api/core/tools/provider/builtin/stepfun/tools/image.yaml index dcc5bd2db2..8d7c9b6586 100644 --- a/api/core/tools/provider/builtin/stepfun/tools/image.yaml +++ b/api/core/tools/provider/builtin/stepfun/tools/image.yaml @@ -29,35 +29,6 @@ parameters: pt_BR: Image prompt, you can check the official documentation of step-1x llm_description: Image prompt of step-1x you should describe the image you want to generate as a list of words as possible as detailed form: llm - - name: model - type: select - required: false - human_description: - en_US: used for selecting the model name - zh_Hans: 用于选择模型的名字 - pt_BR: used for selecting the model name - label: - en_US: Model Name - zh_Hans: 模型名字 - pt_BR: Model Name - form: form - options: - - value: step-1x-turbo - label: - en_US: turbo - zh_Hans: turbo - pt_BR: turbo - - value: step-1x-medium - label: - en_US: medium - zh_Hans: medium - pt_BR: medium - - value: step-1x-large - label: - en_US: large - zh_Hans: large - pt_BR: large - default: step-1x-medium - name: size type: select required: false diff --git a/api/core/tools/provider/builtin/tavily/tavily.yaml b/api/core/tools/provider/builtin/tavily/tavily.yaml index 7b25a81848..95820f4d18 100644 --- a/api/core/tools/provider/builtin/tavily/tavily.yaml +++ b/api/core/tools/provider/builtin/tavily/tavily.yaml @@ -28,4 +28,4 @@ credentials_for_provider: en_US: Get your Tavily API key from Tavily zh_Hans: 从 TavilyApi 获取您的 Tavily API key pt_BR: Get your Tavily API key from Tavily - url: https://docs.tavily.com/docs/tavily-api/introduction + url: https://docs.tavily.com/docs/welcome diff --git a/api/core/tools/provider/builtin/youtube/youtube.py b/api/core/tools/provider/builtin/youtube/youtube.py index aad876491c..07e430bcbf 100644 --- a/api/core/tools/provider/builtin/youtube/youtube.py +++ b/api/core/tools/provider/builtin/youtube/youtube.py @@ -13,7 +13,7 @@ class YahooFinanceProvider(BuiltinToolProviderController): ).invoke( user_id="", tool_parameters={ - "channel": "TOKYO GIRLS COLLECTION", + "channel": "UC2JZCsZSOudXA08cMMRCL9g", "start_date": "2020-01-01", "end_date": "2024-12-31", }, diff --git a/api/core/tools/provider/tool_provider.py b/api/core/tools/provider/tool_provider.py index 05c88b904e..321b212014 100644 --- a/api/core/tools/provider/tool_provider.py +++ b/api/core/tools/provider/tool_provider.py @@ -153,6 +153,9 @@ class ToolProviderController(BaseModel, ABC): # check type credential_schema = credentials_need_to_validate[credential_name] + if not credential_schema.required and credentials[credential_name] is None: + continue + if credential_schema.type in { ToolProviderCredentials.CredentialsType.SECRET_INPUT, ToolProviderCredentials.CredentialsType.TEXT_INPUT, diff --git a/api/poetry.lock b/api/poetry.lock index 184cdb9e81..bce21fb547 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -2333,13 +2333,13 @@ develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", [[package]] name = "elasticsearch" -version = "8.14.0" +version = "8.15.1" description = "Python client for Elasticsearch" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "elasticsearch-8.14.0-py3-none-any.whl", hash = "sha256:cef8ef70a81af027f3da74a4f7d9296b390c636903088439087b8262a468c130"}, - {file = "elasticsearch-8.14.0.tar.gz", hash = "sha256:aa2490029dd96f4015b333c1827aa21fd6c0a4d223b00dfb0fe933b8d09a511b"}, + {file = "elasticsearch-8.15.1-py3-none-any.whl", hash = "sha256:02a0476e98768a30d7926335fc0d305c04fdb928eea1354c6e6040d8c2814569"}, + {file = "elasticsearch-8.15.1.tar.gz", hash = "sha256:40c0d312f8adf8bdc81795bc16a0b546ddf544cb1f90e829a244e4780c4dbfd8"}, ] [package.dependencies] @@ -2347,7 +2347,10 @@ elastic-transport = ">=8.13,<9" [package.extras] async = ["aiohttp (>=3,<4)"] +dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "nox", "numpy", "orjson", "pandas", "pyarrow", "pytest", "pytest-asyncio", "pytest-cov", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "twine", "unasync"] +docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"] orjson = ["orjson (>=3)"] +pyarrow = ["pyarrow (>=1)"] requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"] vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"] @@ -10498,4 +10501,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "17c4108d92c415d987f8b437ea3e0484c5601a05bfe175339a8546c93c159bc5" +content-hash = "69b42bb1ff033f14e199fee8335356275099421d72bbd7037b7a991ea65cae08" diff --git a/api/pyproject.toml b/api/pyproject.toml index 9e38c09456..f004865d5f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -253,7 +253,7 @@ alibabacloud_gpdb20160503 = "~3.8.0" alibabacloud_tea_openapi = "~0.3.9" chromadb = "0.5.1" clickhouse-connect = "~0.7.16" -elasticsearch = "8.14.0" +elasticsearch = "~8.15.1" oracledb = "~2.2.1" pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] } pgvector = "0.2.5" diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py new file mode 100644 index 0000000000..7bf723b3a9 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py @@ -0,0 +1,54 @@ +import os + +import pytest + +from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.fireworks.text_embedding.text_embedding import FireworksTextEmbeddingModel +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True) +def test_validate_credentials(setup_openai_mock): + model = FireworksTextEmbeddingModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": "invalid_key"} + ) + + model.validate_credentials( + model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")} + ) + + +@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True) +def test_invoke_model(setup_openai_mock): + model = FireworksTextEmbeddingModel() + + result = model.invoke( + model="nomic-ai/nomic-embed-text-v1.5", + credentials={ + "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"), + }, + texts=["hello", "world", " ".join(["long_text"] * 100), " ".join(["another_long_text"] * 100)], + user="foo", + ) + + assert isinstance(result, TextEmbeddingResult) + assert len(result.embeddings) == 4 + assert result.usage.total_tokens == 2 + + +def test_get_num_tokens(): + model = FireworksTextEmbeddingModel() + + num_tokens = model.get_num_tokens( + model="nomic-ai/nomic-embed-text-v1.5", + credentials={ + "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"), + }, + texts=["hello", "world"], + ) + + assert num_tokens == 2 diff --git a/docker/.env.example b/docker/.env.example index 7eaaceb928..d43c3edc7e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`. +# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`. VECTOR_STORE=weaviate # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`. @@ -385,13 +385,30 @@ MYSCALE_PASSWORD= MYSCALE_DATABASE=dify MYSCALE_FTS_PARAMS= -# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector` +# pgvector configurations, only available when VECTOR_STORE is `pgvector` PGVECTOR_HOST=pgvector PGVECTOR_PORT=5432 PGVECTOR_USER=postgres PGVECTOR_PASSWORD=difyai123456 PGVECTOR_DATABASE=dify +# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs` +PGVECTO_RS_HOST=pgvecto-rs +PGVECTO_RS_PORT=5432 +PGVECTO_RS_USER=postgres +PGVECTO_RS_PASSWORD=difyai123456 +PGVECTO_RS_DATABASE=dify + +# analyticdb configurations, only available when VECTOR_STORE is `analyticdb` +ANALYTICDB_KEY_ID=your-ak +ANALYTICDB_KEY_SECRET=your-sk +ANALYTICDB_REGION_ID=cn-hangzhou +ANALYTICDB_INSTANCE_ID=gp-ab123456 +ANALYTICDB_ACCOUNT=testaccount +ANALYTICDB_PASSWORD=testpassword +ANALYTICDB_NAMESPACE=dify +ANALYTICDB_NAMESPACE_PASSWORD=difypassword + # TiDB vector configurations, only available when VECTOR_STORE is `tidb` TIDB_VECTOR_HOST=tidb TIDB_VECTOR_PORT=4000 @@ -568,6 +585,10 @@ WORKFLOW_MAX_EXECUTION_STEPS=500 WORKFLOW_MAX_EXECUTION_TIME=1200 WORKFLOW_CALL_MAX_DEPTH=5 +# HTTP request node in workflow configuration +HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760 +HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576 + # SSRF Proxy server HTTP URL SSRF_PROXY_HTTP_URL=http://ssrf_proxy:3128 # SSRF Proxy server HTTPS URL diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 16bef279bc..95e271a0e9 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -207,6 +207,8 @@ x-shared-env: &shared-api-worker-env WORKFLOW_CALL_MAX_DEPTH: ${WORKFLOW_MAX_EXECUTION_TIME:-5} SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128} SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128} + HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760} + HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576} services: # API service @@ -628,7 +630,7 @@ services: # https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html # https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-prod-prerequisites elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.14.3 + image: docker.elastic.co/elasticsearch/elasticsearch:8.15.1 container_name: elasticsearch profiles: - elasticsearch @@ -655,7 +657,7 @@ services: # https://www.elastic.co/guide/en/kibana/current/docker.html # https://www.elastic.co/guide/en/kibana/current/settings.html kibana: - image: docker.elastic.co/kibana/kibana:8.14.3 + image: docker.elastic.co/kibana/kibana:8.15.1 container_name: kibana profiles: - elasticsearch diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py index 2be079bdf3..5e42507a42 100644 --- a/sdks/python-client/dify_client/client.py +++ b/sdks/python-client/dify_client/client.py @@ -1,103 +1,80 @@ import json + import requests class DifyClient: - def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1'): + def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1"): self.api_key = api_key self.base_url = base_url def _send_request(self, method, endpoint, json=None, params=None, stream=False): - headers = { - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json" - } + headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} url = f"{self.base_url}{endpoint}" response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream) return response - def _send_request_with_files(self, method, endpoint, data, files): - headers = { - "Authorization": f"Bearer {self.api_key}" - } + headers = {"Authorization": f"Bearer {self.api_key}"} url = f"{self.base_url}{endpoint}" response = requests.request(method, url, data=data, headers=headers, files=files) return response - + def message_feedback(self, message_id, rating, user): - data = { - "rating": rating, - "user": user - } + data = {"rating": rating, "user": user} return self._send_request("POST", f"/messages/{message_id}/feedbacks", data) - + def get_application_parameters(self, user): params = {"user": user} return self._send_request("GET", "/parameters", params=params) - + def file_upload(self, user, files): - data = { - "user": user - } + data = {"user": user} return self._send_request_with_files("POST", "/files/upload", data=data, files=files) - def text_to_audio(self, text:str, user:str, streaming:bool=False): - data = { - "text": text, - "user": user, - "streaming": streaming - } + def text_to_audio(self, text: str, user: str, streaming: bool = False): + data = {"text": text, "user": user, "streaming": streaming} return self._send_request("POST", "/text-to-audio", data=data) - - def get_meta(self,user): - params = { "user": user} - return self._send_request("GET", f"/meta", params=params) + + def get_meta(self, user): + params = {"user": user} + return self._send_request("GET", "/meta", params=params) class CompletionClient(DifyClient): def create_completion_message(self, inputs, response_mode, user, files=None): - data = { - "inputs": inputs, - "response_mode": response_mode, - "user": user, - "files": files - } - return self._send_request("POST", "/completion-messages", data, - stream=True if response_mode == "streaming" else False) + data = {"inputs": inputs, "response_mode": response_mode, "user": user, "files": files} + return self._send_request( + "POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False + ) class ChatClient(DifyClient): def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None): - data = { - "inputs": inputs, - "query": query, - "user": user, - "response_mode": response_mode, - "files": files - } + data = {"inputs": inputs, "query": query, "user": user, "response_mode": response_mode, "files": files} if conversation_id: data["conversation_id"] = conversation_id - return self._send_request("POST", "/chat-messages", data, - stream=True if response_mode == "streaming" else False) - - def get_suggested(self, message_id, user:str): + return self._send_request( + "POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False + ) + + def get_suggested(self, message_id, user: str): params = {"user": user} return self._send_request("GET", f"/messages/{message_id}/suggested", params=params) - + def stop_message(self, task_id, user): data = {"user": user} - return self._send_request("POST", f"/chat-messages/{task_id}/stop", data) + return self._send_request("POST", f"/chat-messages/{task_id}/stop", data) def get_conversations(self, user, last_id=None, limit=None, pinned=None): params = {"user": user, "last_id": last_id, "limit": limit, "pinned": pinned} return self._send_request("GET", "/conversations", params=params) - + def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None): params = {"user": user} @@ -109,15 +86,15 @@ class ChatClient(DifyClient): params["limit"] = limit return self._send_request("GET", "/messages", params=params) - - def rename_conversation(self, conversation_id, name,auto_generate:bool, user:str): - data = {"name": name, "auto_generate": auto_generate,"user": user} + + def rename_conversation(self, conversation_id, name, auto_generate: bool, user: str): + data = {"name": name, "auto_generate": auto_generate, "user": user} return self._send_request("POST", f"/conversations/{conversation_id}/name", data) def delete_conversation(self, conversation_id, user): data = {"user": user} return self._send_request("DELETE", f"/conversations/{conversation_id}", data) - + def audio_to_text(self, audio_file, user): data = {"user": user} files = {"audio_file": audio_file} @@ -125,10 +102,10 @@ class ChatClient(DifyClient): class WorkflowClient(DifyClient): - def run(self, inputs:dict, response_mode:str="streaming", user:str="abc-123"): + def run(self, inputs: dict, response_mode: str = "streaming", user: str = "abc-123"): data = {"inputs": inputs, "response_mode": response_mode, "user": user} return self._send_request("POST", "/workflows/run", data) - + def stop(self, task_id, user): data = {"user": user} return self._send_request("POST", f"/workflows/tasks/{task_id}/stop", data) @@ -137,10 +114,8 @@ class WorkflowClient(DifyClient): return self._send_request("GET", f"/workflows/run/{workflow_run_id}") - class KnowledgeBaseClient(DifyClient): - - def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1', dataset_id: str = None): + def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1", dataset_id: str = None): """ Construct a KnowledgeBaseClient object. @@ -150,10 +125,7 @@ class KnowledgeBaseClient(DifyClient): dataset_id (str, optional): ID of the dataset. Defaults to None. You don't need this if you just want to create a new dataset. or list datasets. otherwise you need to set this. """ - super().__init__( - api_key=api_key, - base_url=base_url - ) + super().__init__(api_key=api_key, base_url=base_url) self.dataset_id = dataset_id def _get_dataset_id(self): @@ -162,10 +134,10 @@ class KnowledgeBaseClient(DifyClient): return self.dataset_id def create_dataset(self, name: str, **kwargs): - return self._send_request('POST', '/datasets', {'name': name}, **kwargs) + return self._send_request("POST", "/datasets", {"name": name}, **kwargs) def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs): - return self._send_request('GET', f'/datasets?page={page}&limit={page_size}', **kwargs) + return self._send_request("GET", f"/datasets?page={page}&limit={page_size}", **kwargs) def create_document_by_text(self, name, text, extra_params: dict = None, **kwargs): """ @@ -193,14 +165,7 @@ class KnowledgeBaseClient(DifyClient): } :return: Response from the API """ - data = { - 'indexing_technique': 'high_quality', - 'process_rule': { - 'mode': 'automatic' - }, - 'name': name, - 'text': text - } + data = {"indexing_technique": "high_quality", "process_rule": {"mode": "automatic"}, "name": name, "text": text} if extra_params is not None and isinstance(extra_params, dict): data.update(extra_params) url = f"/datasets/{self._get_dataset_id()}/document/create_by_text" @@ -233,10 +198,7 @@ class KnowledgeBaseClient(DifyClient): } :return: Response from the API """ - data = { - 'name': name, - 'text': text - } + data = {"name": name, "text": text} if extra_params is not None and isinstance(extra_params, dict): data.update(extra_params) url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_text" @@ -269,16 +231,11 @@ class KnowledgeBaseClient(DifyClient): :return: Response from the API """ files = {"file": open(file_path, "rb")} - data = { - 'process_rule': { - 'mode': 'automatic' - }, - 'indexing_technique': 'high_quality' - } + data = {"process_rule": {"mode": "automatic"}, "indexing_technique": "high_quality"} if extra_params is not None and isinstance(extra_params, dict): data.update(extra_params) if original_document_id is not None: - data['original_document_id'] = original_document_id + data["original_document_id"] = original_document_id url = f"/datasets/{self._get_dataset_id()}/document/create_by_file" return self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files) @@ -352,11 +309,11 @@ class KnowledgeBaseClient(DifyClient): """ params = {} if page is not None: - params['page'] = page + params["page"] = page if page_size is not None: - params['limit'] = page_size + params["limit"] = page_size if keyword is not None: - params['keyword'] = keyword + params["keyword"] = keyword url = f"/datasets/{self._get_dataset_id()}/documents" return self._send_request("GET", url, params=params, **kwargs) @@ -383,9 +340,9 @@ class KnowledgeBaseClient(DifyClient): url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments" params = {} if keyword is not None: - params['keyword'] = keyword + params["keyword"] = keyword if status is not None: - params['status'] = status + params["status"] = status if "params" in kwargs: params.update(kwargs["params"]) return self._send_request("GET", url, params=params, **kwargs) diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx index 90874f50ce..0f18544335 100644 --- a/web/app/activate/page.tsx +++ b/web/app/activate/page.tsx @@ -22,7 +22,7 @@ const Activate = () => {
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
diff --git a/web/app/components/app/configuration/config-var/config-modal/index.tsx b/web/app/components/app/configuration/config-var/config-modal/index.tsx index f8510a5cb8..85e241a203 100644 --- a/web/app/components/app/configuration/config-var/config-modal/index.tsx +++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx @@ -1,6 +1,6 @@ 'use client' import type { FC } from 'react' -import React, { useCallback, useState } from 'react' +import React, { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import produce from 'immer' @@ -45,6 +45,12 @@ const ConfigModal: FC = ({ const { t } = useTranslation() const [tempPayload, setTempPayload] = useState(payload || getNewVarInWorkflow('') as any) const { type, label, variable, options, max_length } = tempPayload + const modalRef = useRef(null) + useEffect(() => { + // To fix the first input element auto focus, then directly close modal will raise error + if (isShow) + modalRef.current?.focus() + }, [isShow]) const isStringInput = type === InputVarType.textInput || type === InputVarType.paragraph const checkVariableName = useCallback((value: string, canBeEmpty?: boolean) => { @@ -175,7 +181,7 @@ const ConfigModal: FC = ({ isShow={isShow} onClose={onClose} > -
+
diff --git a/web/app/components/base/chat/chat/answer/index.tsx b/web/app/components/base/chat/chat/answer/index.tsx index 1ee098a1db..41d4f3829f 100644 --- a/web/app/components/base/chat/chat/answer/index.tsx +++ b/web/app/components/base/chat/chat/answer/index.tsx @@ -84,6 +84,19 @@ const Answer: FC = ({ getContentWidth() }, [responding]) + // Recalculate contentWidth when content changes (e.g., SVG preview/source toggle) + useEffect(() => { + if (!containerRef.current) + return + const resizeObserver = new ResizeObserver(() => { + getContentWidth() + }) + resizeObserver.observe(containerRef.current) + return () => { + resizeObserver.disconnect() + } + }, []) + return (
diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx index 443ee3410c..39a399cc9f 100644 --- a/web/app/components/base/markdown.tsx +++ b/web/app/components/base/markdown.tsx @@ -116,59 +116,80 @@ const CodeBlock: CodeComponent = memo(({ inline, className, children, ...props } const match = /language-(\w+)/.exec(className || '') const language = match?.[1] const languageShowName = getCorrectCapitalizationLanguageName(language || '') - let chartData = JSON.parse(String('{"title":{"text":"ECharts error - Wrong JSON format."}}').replace(/\n$/, '')) - if (language === 'echarts') { - try { - chartData = JSON.parse(String(children).replace(/\n$/, '')) + const chartData = useMemo(() => { + if (language === 'echarts') { + try { + return JSON.parse(String(children).replace(/\n$/, '')) + } + catch (error) {} } - catch (error) { - } - } + return JSON.parse('{"title":{"text":"ECharts error - Wrong JSON format."}}') + }, [language, children]) - // Use `useMemo` to ensure that `SyntaxHighlighter` only re-renders when necessary - return useMemo(() => { - return (!inline && match) - ? ( -
-
-
{languageShowName}
-
- {language === 'mermaid' && } - -
-
- {(language === 'mermaid' && isSVG) - ? () - : (language === 'echarts' - ? (
) - : (language === 'svg' - ? () - : ( - {String(children).replace(/\n$/, '')} - )))} + const renderCodeContent = useMemo(() => { + const content = String(children).replace(/\n$/, '') + if (language === 'mermaid' && isSVG) { + return + } + else if (language === 'echarts') { + return ( +
+ + +
) - : ({children}) - }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props]) + } + else if (language === 'svg' && isSVG) { + return ( + + + + ) + } + else { + return ( + + {content} + + ) + } + }, [language, match, props, children, chartData, isSVG]) + + if (inline || !match) + return {children} + + return ( +
+
+
{languageShowName}
+
+ {(['mermaid', 'svg']).includes(language!) && } + +
+
+ {renderCodeContent} +
+ ) }) CodeBlock.displayName = 'CodeBlock' diff --git a/web/app/components/base/svg-gallery/index.tsx b/web/app/components/base/svg-gallery/index.tsx index 81e8e87655..4368df00e9 100644 --- a/web/app/components/base/svg-gallery/index.tsx +++ b/web/app/components/base/svg-gallery/index.tsx @@ -29,7 +29,7 @@ export const SVGRenderer = ({ content }: { content: string }) => { if (svgRef.current) { try { svgRef.current.innerHTML = '' - const draw = SVG().addTo(svgRef.current).size('100%', '100%') + const draw = SVG().addTo(svgRef.current) const parser = new DOMParser() const svgDoc = parser.parseFromString(content, 'image/svg+xml') @@ -40,13 +40,11 @@ export const SVGRenderer = ({ content }: { content: string }) => { const originalWidth = parseInt(svgElement.getAttribute('width') || '400', 10) const originalHeight = parseInt(svgElement.getAttribute('height') || '600', 10) - const scale = Math.min(windowSize.width / originalWidth, windowSize.height / originalHeight, 1) - const scaledWidth = originalWidth * scale - const scaledHeight = originalHeight * scale - draw.size(scaledWidth, scaledHeight) + draw.viewbox(0, 0, originalWidth, originalHeight) + + svgRef.current.style.width = `${Math.min(originalWidth, 298)}px` const rootElement = draw.svg(content) - rootElement.scale(scale) rootElement.click(() => { setImagePreview(svgToDataURL(svgElement as Element)) @@ -54,7 +52,7 @@ export const SVGRenderer = ({ content }: { content: string }) => { } catch (error) { if (svgRef.current) - svgRef.current.innerHTML = 'Error rendering SVG. Wait for the image content to complete.' + svgRef.current.innerHTML = 'Error rendering SVG. Wait for the image content to complete.' } } }, [content, windowSize]) @@ -62,14 +60,14 @@ export const SVGRenderer = ({ content }: { content: string }) => { return ( <>
{imagePreview && ( setImagePreview('')} />)} diff --git a/web/app/components/tools/workflow-tool/configure-button.tsx b/web/app/components/tools/workflow-tool/configure-button.tsx index d2c5142f53..6521410dae 100644 --- a/web/app/components/tools/workflow-tool/configure-button.tsx +++ b/web/app/components/tools/workflow-tool/configure-button.tsx @@ -65,7 +65,7 @@ const WorkflowToolConfigureButton = ({ else { if (item.type === 'paragraph' && param.type !== 'string') return true - if (param.type !== item.type && !(param.type === 'string' && item.type === 'paragraph')) + if (item.type === 'text-input' && param.type !== 'string') return true } } diff --git a/web/app/components/workflow/nodes/http/node.tsx b/web/app/components/workflow/nodes/http/node.tsx index 5bbb10fc3a..4b7dbea257 100644 --- a/web/app/components/workflow/nodes/http/node.tsx +++ b/web/app/components/workflow/nodes/http/node.tsx @@ -15,7 +15,7 @@ const Node: FC> = ({
{method}
-
+
{ return (
{ supportCitationHitInfo: true, } as any} chatList={chatList} - chatContainerClassName='px-4' + chatContainerClassName='px-3' chatContainerInnerClassName='pt-6 w-full max-w-full mx-auto' chatFooterClassName='px-4 rounded-b-2xl' chatFooterInnerClassName='pb-4 w-full max-w-full mx-auto' @@ -129,6 +129,8 @@ const ChatRecord = () => { noChatInput allToolIcons={{}} showPromptLog + noSpacing + chatAnswerContainerInner='!pr-2' />
diff --git a/web/app/forgot-password/page.tsx b/web/app/forgot-password/page.tsx index fa44d1a20c..bb46011c06 100644 --- a/web/app/forgot-password/page.tsx +++ b/web/app/forgot-password/page.tsx @@ -28,7 +28,7 @@ const ForgotPassword = () => {
{token ? : }
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
diff --git a/web/app/install/page.tsx b/web/app/install/page.tsx index 9fa38dd15e..395fae34ec 100644 --- a/web/app/install/page.tsx +++ b/web/app/install/page.tsx @@ -22,7 +22,7 @@ const Install = () => {
- © {new Date().getFullYear()} Dify, Inc. All rights reserved. + © {new Date().getFullYear()} LangGenius, Inc. All rights reserved.
diff --git a/web/i18n/es-ES/common.ts b/web/i18n/es-ES/common.ts index 2ba907361f..59a05f63d8 100644 --- a/web/i18n/es-ES/common.ts +++ b/web/i18n/es-ES/common.ts @@ -202,7 +202,7 @@ const translation = { invitationLink: 'Enlace de invitación', failedInvitationEmails: 'Los siguientes usuarios no fueron invitados exitosamente', ok: 'OK', - removeFromTeam: 'Eliminar del equipo', + removeFromTeam: 'Eliminar del espacio de trabajo', removeFromTeamTip: 'Se eliminará el acceso al equipo', setAdmin: 'Establecer como administrador', setMember: 'Establecer como miembro ordinario', diff --git a/web/i18n/zh-Hans/common.ts b/web/i18n/zh-Hans/common.ts index 3b0706280a..74c4727848 100644 --- a/web/i18n/zh-Hans/common.ts +++ b/web/i18n/zh-Hans/common.ts @@ -200,7 +200,7 @@ const translation = { invitationLink: '邀请链接', failedInvitationEmails: '邀请以下邮箱失败', ok: '好的', - removeFromTeam: '移除团队', + removeFromTeam: '移出团队', removeFromTeamTip: '将取消团队访问', setAdmin: '设为管理员', setMember: '设为普通成员', diff --git a/web/i18n/zh-Hant/common.ts b/web/i18n/zh-Hant/common.ts index c1f3ed2b2b..8cd51b1991 100644 --- a/web/i18n/zh-Hant/common.ts +++ b/web/i18n/zh-Hant/common.ts @@ -194,7 +194,7 @@ const translation = { invitationLink: '邀請連結', failedInvitationEmails: '邀請以下郵箱失敗', ok: '好的', - removeFromTeam: '移除團隊', + removeFromTeam: '移出團隊', removeFromTeamTip: '將取消團隊訪問', setAdmin: '設為管理員', setMember: '設為普通成員',