diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml
new file mode 100644
index 0000000000..5aee64b8e6
--- /dev/null
+++ b/.github/workflows/web-tests.yml
@@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+ pull_request:
+ branches:
+ - main
+ paths:
+ - web/**
+
+concurrency:
+ group: web-tests-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ test:
+ name: Web Tests
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ working-directory: ./web
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Check changed files
+ id: changed-files
+ uses: tj-actions/changed-files@v45
+ with:
+ files: web/**
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v4
+ if: steps.changed-files.outputs.any_changed == 'true'
+ with:
+ node-version: 20
+ cache: yarn
+ cache-dependency-path: ./web/package.json
+
+ - name: Install dependencies
+ if: steps.changed-files.outputs.any_changed == 'true'
+ run: yarn install --frozen-lockfile
+
+ - name: Run tests
+ if: steps.changed-files.outputs.any_changed == 'true'
+ run: yarn test
diff --git a/api/app.py b/api/app.py
index 91a49337fc..1b58beee15 100644
--- a/api/app.py
+++ b/api/app.py
@@ -53,11 +53,9 @@ from services.account_service import AccountService
warnings.simplefilter("ignore", ResourceWarning)
-# fix windows platform
-if os.name == "nt":
- os.system('tzutil /s "UTC"')
-else:
- os.environ["TZ"] = "UTC"
+os.environ["TZ"] = "UTC"
+# windows platform not support tzset
+if hasattr(time, "tzset"):
time.tzset()
diff --git a/api/commands.py b/api/commands.py
index b8fc81af67..7ef4aed7f7 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -652,7 +652,7 @@ where sites.id is null limit 1000"""
app_was_created.send(app, account=account)
except Exception as e:
failed_app_ids.append(app_id)
- click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
+ click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
logging.exception(f"Fix app related site missing issue failed, error: {e}")
continue
diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py
index c6855ac854..62e79ec444 100644
--- a/api/core/app/apps/base_app_generate_response_converter.py
+++ b/api/core/app/apps/base_app_generate_response_converter.py
@@ -75,10 +75,10 @@ class AppGenerateResponseConverter(ABC):
:return:
"""
# show_retrieve_source
+ updated_resources = []
if "retriever_resources" in metadata:
- metadata["retriever_resources"] = []
for resource in metadata["retriever_resources"]:
- metadata["retriever_resources"].append(
+ updated_resources.append(
{
"segment_id": resource["segment_id"],
"position": resource["position"],
@@ -87,6 +87,7 @@ class AppGenerateResponseConverter(ABC):
"content": resource["content"],
}
)
+ metadata["retriever_resources"] = updated_resources
# show annotation reply
if "annotation_reply" in metadata:
diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py
index 1b412b8639..203aca3384 100644
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -309,7 +309,7 @@ class AppRunner:
if not prompt_messages:
prompt_messages = result.prompt_messages
- if not usage and result.delta.usage:
+ if result.delta.usage:
usage = result.delta.usage
if not usage:
diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py
index 8ce12fd59f..75219051cd 100644
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional, cast
import numpy as np
from sqlalchemy.exc import IntegrityError
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks]
- embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+ embedding_result = self._model_instance.invoke_text_embedding(
+ texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+ )
for vector in embedding_result.embeddings:
try:
@@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try:
- embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+ embedding_result = self._model_instance.invoke_text_embedding(
+ texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+ )
embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
diff --git a/api/core/embedding/embedding_constant.py b/api/core/embedding/embedding_constant.py
new file mode 100644
index 0000000000..9b4934646b
--- /dev/null
+++ b/api/core/embedding/embedding_constant.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+ """
+ Enum for embedding input type.
+ """
+
+ DOCUMENT = "document"
+ QUERY = "query"
diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py
index c40b6d1808..e5b6784516 100644
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -65,7 +65,6 @@ SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response"
- "(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
"The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n'
)
diff --git a/api/core/model_manager.py b/api/core/model_manager.py
index 990efd36c6..74b4452362 100644
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -3,6 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast
+from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError
@@ -158,12 +159,15 @@ class ModelInstance:
tools=tools,
)
- def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+ def invoke_text_embedding(
+ self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+ ) -> TextEmbeddingResult:
"""
Invoke large language model
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if not isinstance(self.model_type_instance, TextEmbeddingModel):
@@ -176,6 +180,7 @@ class ModelInstance:
credentials=self.credentials,
texts=texts,
user=user,
+ input_type=input_type,
)
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
diff --git a/api/core/model_runtime/model_providers/__base/text_embedding_model.py b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
index 54a4486023..a948dca20d 100644
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@@ -4,6 +4,7 @@ from typing import Optional
from pydantic import ConfigDict
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel
@@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
model_config = ConfigDict(protected_namespaces=())
def invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
- Invoke large language model
+ Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
self.started_at = time.perf_counter()
try:
- return self._invoke(model, credentials, texts, user)
+ return self._invoke(model, credentials, texts, user, input_type)
except Exception as e:
raise self._transform_invoke_error(e)
@abstractmethod
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
- Invoke large language model
+ Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
raise NotImplementedError
diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index 1f5f64019a..80db22ea84 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -38,3 +38,5 @@
- perfxcloud
- zhinao
- fireworks
+- mixedbread
+- nomic
diff --git a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
index d9cff8ecbb..8701a38050 100644
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
import tiktoken
from openai import AzureOpenAI
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs)
diff --git a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
index 779dfbb608..56b9be1c36 100644
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
index 251170d1ae..d9c5726592 100644
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@@ -13,6 +13,7 @@ from botocore.exceptions import (
UnknownServiceError,
)
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
client_config = Config(region_name=credentials["aws_region"])
diff --git a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
index a1c5e98118..4da2080690 100644
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ import cohere
import numpy as np
from cohere.core import RequestOptions
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
index f886fa23b5..cdb87a55e9 100644
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@@ -15,6 +15,7 @@ help:
en_US: https://fireworks.ai/account/api-keys
supported_model_types:
- llm
+ - text-embedding
configurate_methods:
- predefined-model
provider_credential_schema:
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
new file mode 100644
index 0000000000..31415a24fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 11B Vision Instruct
+ en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
new file mode 100644
index 0000000000..c2fd77d256
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+ zh_Hans: Llama 3.2 1B Instruct
+ en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
new file mode 100644
index 0000000000..4b3c459c7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+ zh_Hans: Llama 3.2 3B Instruct
+ en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
new file mode 100644
index 0000000000..0aece7455d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 90B Vision Instruct
+ en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
new file mode 100644
index 0000000000..d7c11691cf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@@ -0,0 +1,12 @@
+model: WhereIsAI/UAE-Large-V1
+label:
+ zh_Hans: UAE-Large-V1
+ en_US: UAE-Large-V1
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
new file mode 100644
index 0000000000..d09bafb4d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-base
+label:
+ zh_Hans: GTE-base
+ en_US: GTE-base
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
new file mode 100644
index 0000000000..c41fa2f9d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-large
+label:
+ zh_Hans: GTE-large
+ en_US: GTE-large
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..c9098503d9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1.5
+label:
+ zh_Hans: nomic-embed-text-v1.5
+ en_US: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+ max_chunks: 16
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..89078d3ff6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1
+label:
+ zh_Hans: nomic-embed-text-v1
+ en_US: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+ max_chunks: 16
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..cdce69ff38
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@@ -0,0 +1,151 @@
+import time
+from collections.abc import Mapping
+from typing import Optional, Union
+
+import numpy as np
+from openai import OpenAI
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+
+class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
+ """
+ Model class for Fireworks text embedding model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
+
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+ if user:
+ extra_model_kwargs["user"] = user
+
+ extra_model_kwargs["encoding_format"] = "float"
+
+ context_size = self._get_context_size(model, credentials)
+ max_chunks = self._get_max_chunks(model, credentials)
+
+ inputs = []
+ indices = []
+ used_tokens = 0
+
+ for i, text in enumerate(texts):
+ # Here token count is only an approximation based on the GPT2 tokenizer
+ # TODO: Optimize for better token estimation and chunking
+ num_tokens = self._get_num_tokens_by_gpt2(text)
+
+ if num_tokens >= context_size:
+ cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
+ # if num tokens is larger than context length, only use the start
+ inputs.append(text[0:cutoff])
+ else:
+ inputs.append(text)
+ indices += [i]
+
+ batched_embeddings = []
+ _iter = range(0, len(inputs), max_chunks)
+
+ for i in _iter:
+ embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+ model=model,
+ client=client,
+ texts=inputs[i : i + max_chunks],
+ extra_model_kwargs=extra_model_kwargs,
+ )
+ used_tokens += embedding_used_tokens
+ batched_embeddings += embeddings_batch
+
+ usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
+ return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+ def validate_credentials(self, model: str, credentials: Mapping) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ # call embedding model
+ self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ def _embedding_invoke(
+ self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
+ ) -> tuple[list[list[float]], int]:
+ """
+ Invoke embedding model
+ :param model: model name
+ :param client: model client
+ :param texts: texts to embed
+ :param extra_model_kwargs: extra model kwargs
+ :return: embeddings and used tokens
+ """
+ response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
+ return [data.embedding for data in response.data], response.usage.total_tokens
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ input_price_info = self.get_price(
+ model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
+ )
+
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at,
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
new file mode 100644
index 0000000000..d84e9937e0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+ en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..2ff70564b2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
index bbc697e934..4e0209890a 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
new file mode 100644
index 0000000000..2aea8149f4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+ en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
index c5695e5dda..faabc5e4d1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
index 24b1c5af8a..a22fcca941 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest
label:
- en_US: Gemini 1.5 Flash
+ en_US: Gemini 1.5 Flash Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
new file mode 100644
index 0000000000..dfd55c3a94
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+ en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
new file mode 100644
index 0000000000..a1feff171d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+ en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..9ae07a06c5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
index 0a918e0d7b..97c68f7a18 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
index 7452ce46e7..860e4816a1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index b3e1ecf3af..d1bf7d269d 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest
label:
- en_US: Gemini 1.5 Pro
+ en_US: Gemini 1.5 Pro Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
new file mode 100644
index 0000000000..bdd70b34a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+ en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
index 075e484e46..2d213d56ad 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -27,6 +27,15 @@ parameter_rules:
default: 4096
min: 1
max: 4096
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
index 4e9f59e7da..e2f487c1ee 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -31,6 +31,15 @@ parameter_rules:
max: 2048
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py
index 3fc6787a44..e686ad08d9 100644
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai
import requests
from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part
from PIL import Image
@@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content)
# Create a new ClientManager with tenant's API key
- new_client_manager = client._ClientManager()
+ new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client
- safety_settings = {
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
- }
-
response = google_model.generate_content(
contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream,
- safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600},
)
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
new file mode 100644
index 0000000000..019d453723
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+ zh_Hans: Llama 3.2 11B Text (Preview)
+ en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
new file mode 100644
index 0000000000..a44e4ff508
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+ zh_Hans: Llama 3.2 1B Text (Preview)
+ en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
new file mode 100644
index 0000000000..f2fdd0a05e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+ zh_Hans: Llama 3.2 3B Text (Preview)
+ en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
new file mode 100644
index 0000000000..3b34e7c079
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+ zh_Hans: Llama 3.2 90B Text (Preview)
+ en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
index 4ad96c4233..b2e6d1b652 100644
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
import requests
from huggingface_hub import HfApi, InferenceClient
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
execute_model = model
diff --git a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
index 55f3c25804..b8ff3ca549 100644
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
import time
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
index 1396e59e18..75701ebc54 100644
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/jina/jina.yaml b/api/core/model_runtime/model_providers/jina/jina.yaml
index 23e18ad75f..970b22965b 100644
--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@@ -1,6 +1,6 @@
provider: jina
label:
- en_US: Jina
+ en_US: Jina AI
description:
en_US: Embedding and Rerank Model Supported
icon_small:
@@ -11,7 +11,7 @@ background: "#EFFDFD"
help:
title:
en_US: Get your API key from Jina AI
- zh_Hans: 从 Jina 获取 API Key
+ zh_Hans: 从 Jina AI 获取 API Key
url:
en_US: https://jina.ai/
supported_model_types:
diff --git a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
index ceb79567d5..b397129512 100644
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -27,8 +28,37 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1"
+ def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
+ """
+ Parse model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return: parsed credentials
+ """
+
+ def transform_jina_input_text(model, text):
+ if model == "jina-clip-v1":
+ return {"text": text}
+ return text
+
+ data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
+
+ # model specific parameters
+ if model == "jina-embeddings-v3":
+ # set `task` type according to input type for the best performance
+ data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
+
+ return data
+
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -37,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
@@ -49,15 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
- def transform_jina_input_text(model, text):
- if model == "jina-clip-v1":
- return {"text": text}
- return text
-
- data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
-
- if model == "jina-embeddings-v3":
- data["task"] = "text-matching"
+ data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
try:
response = post(url, headers=headers, data=dumps(data))
diff --git a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
index 7d258be81e..ab8ca76c2f 100644
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
from requests import post
from yarl import URL
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
class LocalAITextEmbeddingModel(TextEmbeddingModel):
"""
- Model class for Jina text embedding model.
+ Model class for LocalAI text embedding model.
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if len(texts) != 1:
diff --git a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
index 76fd1342bd..74d2a221d1 100644
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.minimax.chat/v1/embeddings"
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["minimax_api_key"]
diff --git a/api/core/model_runtime/model_providers/mixedbread/__init__.py b/api/core/model_runtime/model_providers/mixedbread/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png
new file mode 100644
index 0000000000..2027611bd5
Binary files /dev/null and b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_l_en.png differ
diff --git a/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png
new file mode 100644
index 0000000000..5c357bddbd
Binary files /dev/null and b/api/core/model_runtime/model_providers/mixedbread/_assets/icon_s_en.png differ
diff --git a/api/core/model_runtime/model_providers/mixedbread/mixedbread.py b/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
new file mode 100644
index 0000000000..3c78150e6f
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.py
@@ -0,0 +1,27 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MixedBreadProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+
+ # Use `mxbai-embed-large-v1` model for validate,
+ model_instance.validate_credentials(model="mxbai-embed-large-v1", credentials=credentials)
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+ raise ex
diff --git a/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml b/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
new file mode 100644
index 0000000000..2f43aea6ad
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/mixedbread.yaml
@@ -0,0 +1,31 @@
+provider: mixedbread
+label:
+ en_US: MixedBread
+description:
+ en_US: Embedding and Rerank Model Supported
+icon_small:
+ en_US: icon_s_en.png
+icon_large:
+ en_US: icon_l_en.png
+background: "#EFFDFD"
+help:
+ title:
+ en_US: Get your API key from MixedBread AI
+ zh_Hans: 从 MixedBread 获取 API Key
+ url:
+ en_US: https://www.mixedbread.ai/
+supported_model_types:
+ - text-embedding
+ - rerank
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/__init__.py b/api/core/model_runtime/model_providers/mixedbread/rerank/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
new file mode 100644
index 0000000000..beda219953
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/mxbai-rerank-large-v1-en.yaml
@@ -0,0 +1,4 @@
+model: mxbai-rerank-large-v1
+model_type: rerank
+model_properties:
+ context_size: 512
diff --git a/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py b/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
new file mode 100644
index 0000000000..bf3c12fd86
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/rerank/rerank.py
@@ -0,0 +1,125 @@
+from typing import Optional
+
+import httpx
+
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class MixedBreadRerankModel(RerankModel):
+ """
+ Model class for MixedBread rerank model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ query: str,
+ docs: list[str],
+ score_threshold: Optional[float] = None,
+ top_n: Optional[int] = None,
+ user: Optional[str] = None,
+ ) -> RerankResult:
+ """
+ Invoke rerank model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param query: search query
+ :param docs: docs for reranking
+ :param score_threshold: score threshold
+ :param top_n: top n documents to return
+ :param user: unique user id
+ :return: rerank result
+ """
+ if len(docs) == 0:
+ return RerankResult(model=model, docs=[])
+
+ base_url = credentials.get("base_url", "https://api.mixedbread.ai/v1")
+ base_url = base_url.removesuffix("/")
+
+ try:
+ response = httpx.post(
+ base_url + "/reranking",
+ json={"model": model, "query": query, "input": docs, "top_k": top_n, "return_input": True},
+ headers={"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"},
+ )
+ response.raise_for_status()
+ results = response.json()
+
+ rerank_documents = []
+ for result in results["data"]:
+ rerank_document = RerankDocument(
+ index=result["index"],
+ text=result["input"],
+ score=result["score"],
+ )
+ if score_threshold is None or result["score"] >= score_threshold:
+ rerank_documents.append(rerank_document)
+
+ return RerankResult(model=model, docs=rerank_documents)
+ except httpx.HTTPStatusError as e:
+ raise InvokeServerUnavailableError(str(e))
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(
+ model=model,
+ credentials=credentials,
+ query="What is the capital of the United States?",
+ docs=[
+ "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
+ "Census, Carson City had a population of 55,274.",
+ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
+ "are a political division controlled by the United States. Its capital is Saipan.",
+ ],
+ score_threshold=0.8,
+ )
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ """
+ return {
+ InvokeConnectionError: [httpx.ConnectError],
+ InvokeServerUnavailableError: [httpx.RemoteProtocolError],
+ InvokeRateLimitError: [],
+ InvokeAuthorizationError: [httpx.HTTPStatusError],
+ InvokeBadRequestError: [httpx.RequestError],
+ }
+
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+ """
+ generate custom model entities from credentials
+ """
+ entity = AIModelEntity(
+ model=model,
+ label=I18nObject(en_US=model),
+ model_type=ModelType.RERANK,
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
+ )
+
+ return entity
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/__init__.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
new file mode 100644
index 0000000000..0c3c863d06
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-2d-large-v1-en.yaml
@@ -0,0 +1,8 @@
+model: mxbai-embed-2d-large-v1
+model_type: text-embedding
+model_properties:
+ context_size: 512
+pricing:
+ input: '0.0001'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
new file mode 100644
index 0000000000..0c5cda2a72
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/mxbai-embed-large-v1-en.yaml
@@ -0,0 +1,8 @@
+model: mxbai-embed-large-v1
+model_type: text-embedding
+model_properties:
+ context_size: 512
+pricing:
+ input: '0.0001'
+ unit: '0.001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..68b7b448bf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@@ -0,0 +1,170 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+import requests
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
+ """
+ Model class for MixedBread text embedding model.
+ """
+
+ api_base: str = "https://api.mixedbread.ai/v1"
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
+ api_key = credentials["api_key"]
+ if not api_key:
+ raise CredentialsValidateFailedError("api_key is required")
+
+ base_url = credentials.get("base_url", self.api_base)
+ base_url = base_url.removesuffix("/")
+
+ url = base_url + "/embeddings"
+ headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
+
+ data = {"model": model, "input": texts}
+
+ try:
+ response = requests.post(url, headers=headers, data=dumps(data))
+ except Exception as e:
+ raise InvokeConnectionError(str(e))
+
+ if response.status_code != 200:
+ try:
+ resp = response.json()
+ msg = resp["detail"]
+ if response.status_code == 401:
+ raise InvokeAuthorizationError(msg)
+ elif response.status_code == 429:
+ raise InvokeRateLimitError(msg)
+ elif response.status_code == 500:
+ raise InvokeServerUnavailableError(msg)
+ else:
+ raise InvokeBadRequestError(msg)
+ except JSONDecodeError as e:
+ raise InvokeServerUnavailableError(
+ f"Failed to convert response to json: {e} with text: {response.text}"
+ )
+
+ try:
+ resp = response.json()
+ embeddings = resp["data"]
+ usage = resp["usage"]
+ except Exception as e:
+ raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+ usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage["total_tokens"])
+
+ result = TextEmbeddingResult(
+ model=model, embeddings=[[float(data) for data in x["embedding"]] for x in embeddings], usage=usage
+ )
+
+ return result
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(model=model, credentials=credentials, texts=["ping"])
+ except Exception as e:
+ raise CredentialsValidateFailedError(f"Credentials validation failed: {e}")
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ return {
+ InvokeConnectionError: [InvokeConnectionError],
+ InvokeServerUnavailableError: [InvokeServerUnavailableError],
+ InvokeRateLimitError: [InvokeRateLimitError],
+ InvokeAuthorizationError: [InvokeAuthorizationError],
+ InvokeBadRequestError: [KeyError, InvokeBadRequestError],
+ }
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ # get input price info
+ input_price_info = self.get_price(
+ model=model, credentials=credentials, price_type=PriceType.INPUT, tokens=tokens
+ )
+
+ # transform usage
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at,
+ )
+
+ return usage
+
+ def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
+ """
+ generate custom model entities from credentials
+ """
+ entity = AIModelEntity(
+ model=model,
+ label=I18nObject(en_US=model),
+ model_type=ModelType.TEXT_EMBEDDING,
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "512"))},
+ )
+
+ return entity
diff --git a/api/core/model_runtime/model_providers/nomic/__init__.py b/api/core/model_runtime/model_providers/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
new file mode 100644
index 0000000000..6c4a1058ab
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_assets/icon_l_en.svg
@@ -0,0 +1,13 @@
+
diff --git a/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png
new file mode 100644
index 0000000000..3eba3b82bc
Binary files /dev/null and b/api/core/model_runtime/model_providers/nomic/_assets/icon_s_en.png differ
diff --git a/api/core/model_runtime/model_providers/nomic/_common.py b/api/core/model_runtime/model_providers/nomic/_common.py
new file mode 100644
index 0000000000..406577dcd7
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/_common.py
@@ -0,0 +1,28 @@
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+
+
+class _CommonNomic:
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ The key is the error type thrown to the caller
+ The value is the error type thrown by the model,
+ which needs to be converted into a unified error type for the caller.
+
+ :return: Invoke error mapping
+ """
+ return {
+ InvokeConnectionError: [InvokeConnectionError],
+ InvokeServerUnavailableError: [InvokeServerUnavailableError],
+ InvokeRateLimitError: [InvokeRateLimitError],
+ InvokeAuthorizationError: [InvokeAuthorizationError],
+ InvokeBadRequestError: [KeyError, InvokeBadRequestError],
+ }
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.py b/api/core/model_runtime/model_providers/nomic/nomic.py
new file mode 100644
index 0000000000..d4e5da2e98
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.py
@@ -0,0 +1,26 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class NomicAtlasProvider(ModelProvider):
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.TEXT_EMBEDDING)
+ model_instance.validate_credentials(model="nomic-embed-text-v1.5", credentials=credentials)
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
+ raise ex
diff --git a/api/core/model_runtime/model_providers/nomic/nomic.yaml b/api/core/model_runtime/model_providers/nomic/nomic.yaml
new file mode 100644
index 0000000000..60dcf1facb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/nomic.yaml
@@ -0,0 +1,29 @@
+provider: nomic
+label:
+ zh_Hans: Nomic Atlas
+ en_US: Nomic Atlas
+icon_small:
+ en_US: icon_s_en.png
+icon_large:
+ en_US: icon_l_en.svg
+background: "#EFF1FE"
+help:
+ title:
+ en_US: Get your API key from Nomic Atlas
+ zh_Hans: 从Nomic Atlas获取 API Key
+ url:
+ en_US: https://atlas.nomic.ai/data
+supported_model_types:
+ - text-embedding
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: nomic_api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nomic/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..111452df57
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+pricing:
+ input: "0.1"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..ac59f106ed
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,8 @@
+model: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+pricing:
+ input: "0.1"
+ unit: "0.000001"
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..857dfb5f41
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@@ -0,0 +1,165 @@
+import time
+from functools import wraps
+from typing import Optional
+
+from nomic import embed
+from nomic import login as nomic_login
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import (
+ EmbeddingUsage,
+ TextEmbeddingResult,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import (
+ TextEmbeddingModel,
+)
+from core.model_runtime.model_providers.nomic._common import _CommonNomic
+
+
+def nomic_login_required(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ try:
+ if not kwargs.get("credentials"):
+ raise ValueError("missing credentials parameters")
+ credentials = kwargs.get("credentials")
+ if "nomic_api_key" not in credentials:
+ raise ValueError("missing nomic_api_key in credentials parameters")
+ # nomic login
+ nomic_login(credentials["nomic_api_key"])
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+ return func(*args, **kwargs)
+
+ return wrapper
+
+
+class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
+ """
+ Model class for nomic text embedding model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
+ embeddings, prompt_tokens, total_tokens = self.embed_text(
+ model=model,
+ credentials=credentials,
+ texts=texts,
+ )
+
+ # calc usage
+ usage = self._calc_response_usage(
+ model=model, credentials=credentials, tokens=prompt_tokens, total_tokens=total_tokens
+ )
+ return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # call embedding model
+ self.embed_text(model=model, credentials=credentials, texts=["ping"])
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ @nomic_login_required
+ def embed_text(self, model: str, credentials: dict, texts: list[str]) -> tuple[list[list[float]], int, int]:
+ """Call out to Nomic's embedding endpoint.
+
+ Args:
+ model: The model to use for embedding.
+ texts: The list of texts to embed.
+
+ Returns:
+ List of embeddings, one for each text, and tokens usage.
+ """
+ embeddings: list[list[float]] = []
+ prompt_tokens = 0
+ total_tokens = 0
+
+ response = embed.text(
+ model=model,
+ texts=texts,
+ )
+
+ if not (response and "embeddings" in response):
+ raise ValueError("Embedding data is missing in the response.")
+
+ if not (response and "usage" in response):
+ raise ValueError("Response usage is missing.")
+
+ if "prompt_tokens" not in response["usage"]:
+ raise ValueError("Response usage does not contain prompt tokens.")
+
+ if "total_tokens" not in response["usage"]:
+ raise ValueError("Response usage does not contain total tokens.")
+
+ embeddings = [list(map(float, e)) for e in response["embeddings"]]
+ total_tokens = response["usage"]["total_tokens"]
+ prompt_tokens = response["usage"]["prompt_tokens"]
+ return embeddings, prompt_tokens, total_tokens
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int, total_tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: prompt tokens
+ :param total_tokens: total tokens
+ :return: usage
+ """
+ # get input price info
+ input_price_info = self.get_price(
+ model=model,
+ credentials=credentials,
+ price_type=PriceType.INPUT,
+ tokens=tokens,
+ )
+
+ # transform usage
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=total_tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at,
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
index 00cec265d5..936ceb8dd2 100644
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
models: list[str] = ["NV-Embed-QA"]
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
index 80ad2be9f5..4de9296cca 100644
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Optional
import numpy as np
import oci
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index ff732e6925..a7ea53e0e9 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
if chunk_json["done"]:
# calculate num tokens
- if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
- # transform usage
+ if "prompt_eval_count" in chunk_json:
prompt_tokens = chunk_json["prompt_eval_count"]
- completion_tokens = chunk_json["eval_count"]
else:
- # calculate num tokens
- prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
- completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+ prompt_message_content = prompt_messages[0].content
+ if isinstance(prompt_message_content, str):
+ prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
+ else:
+ content_text = ""
+ for message_content in prompt_message_content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ content_text += message_content.data
+ prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
+
+ completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
index b4c61d8a6d..5cf3f1c6fa 100644
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@@ -8,6 +8,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
index 535d8388bc..16f1a0cfa1 100644
--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
import tiktoken
from openai import OpenAI
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# transform credentials to kwargs for model instance
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
index e83cfdf873..64fa6aaa3c 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
index 00e583cc79..c5d4330912 100644
--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
index b62a2d2aaf..1e86f351c8 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
index 71b6fb99c4..9f724a77ac 100644
--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
if "model_version" in credentials:
diff --git a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
index 2edd13d56d..97b7692044 100644
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@@ -84,9 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
Model class for Cohere large language model.
"""
- sagemaker_client: Any = None
- sagemaker_sess: Any = None
+ sagemaker_session: Any = None
predictor: Any = None
+ sagemaker_endpoint: str = None
def _handle_chat_generate_response(
self,
@@ -212,27 +212,29 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
:param user: unique user id
:return: full response or stream response chunk generator result
"""
- if not self.sagemaker_client:
- access_key = credentials.get("access_key")
- secret_key = credentials.get("secret_key")
+ if not self.sagemaker_session:
+ access_key = credentials.get("aws_access_key_id")
+ secret_key = credentials.get("aws_secret_access_key")
aws_region = credentials.get("aws_region")
+ boto_session = None
if aws_region:
if access_key and secret_key:
- self.sagemaker_client = boto3.client(
- "sagemaker-runtime",
- aws_access_key_id=access_key,
- aws_secret_access_key=secret_key,
- region_name=aws_region,
+ boto_session = boto3.Session(
+ aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=aws_region
)
else:
- self.sagemaker_client = boto3.client("sagemaker-runtime", region_name=aws_region)
+ boto_session = boto3.Session(region_name=aws_region)
else:
- self.sagemaker_client = boto3.client("sagemaker-runtime")
+ boto_session = boto3.Session()
- sagemaker_session = Session(sagemaker_runtime_client=self.sagemaker_client)
+ sagemaker_client = boto_session.client("sagemaker")
+ self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
+
+ if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
+ self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
self.predictor = Predictor(
- endpoint_name=credentials.get("sagemaker_endpoint"),
- sagemaker_session=sagemaker_session,
+ endpoint_name=self.sagemaker_endpoint,
+ sagemaker_session=self.sagemaker_session,
serializer=serializers.JSONSerializer(),
)
diff --git a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
index d55144f8a7..8f993ce672 100644
--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Any, Optional
import boto3
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
return embeddings
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index 43db4aed11..a3e5d0981f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,25 +1,38 @@
-- Qwen/Qwen2.5-7B-Instruct
-- Qwen/Qwen2.5-14B-Instruct
-- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-72B-Instruct
+- Qwen/Qwen2.5-Math-72B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-7B-Instruct
+- deepseek-ai/DeepSeek-V2.5
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
-- 01-ai/Yi-1.5-34B-Chat
-- 01-ai/Yi-1.5-9B-Chat-16K
-- 01-ai/Yi-1.5-6B-Chat
-- THUDM/glm-4-9b-chat
-- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
+- THUDM/glm-4-9b-chat
+- THUDM/chatglm3-6b
+- 01-ai/Yi-1.5-34B-Chat-16K
+- 01-ai/Yi-1.5-9B-Chat-16K
+- 01-ai/Yi-1.5-6B-Chat
+- internlm/internlm2_5-20b-chat
- internlm/internlm2_5-7b-chat
-- google/gemma-2-27b-it
-- google/gemma-2-9b-it
-- meta-llama/Meta-Llama-3-70B-Instruct
-- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-405B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
-- mistralai/Mixtral-8x7B-Instruct-v0.1
+- meta-llama/Meta-Llama-3-70B-Instruct
+- meta-llama/Meta-Llama-3-8B-Instruct
+- google/gemma-2-27b-it
+- google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2
+- Pro/Qwen/Qwen2-7B-Instruct
+- Pro/Qwen/Qwen2-1.5B-Instruct
+- Pro/THUDM/glm-4-9b-chat
+- Pro/THUDM/chatglm3-6b
+- Pro/01-ai/Yi-1.5-9B-Chat-16K
+- Pro/01-ai/Yi-1.5-6B-Chat
+- Pro/internlm/internlm2_5-7b-chat
+- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
+- Pro/meta-llama/Meta-Llama-3-8B-Instruct
+- Pro/google/gemma-2-9b-it
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
index 27664eab6c..89fb153ba0 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -28,3 +28,4 @@ pricing:
output: '0'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
index fd7aada428..2785e7496f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -28,3 +28,4 @@ pricing:
output: '1.26'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
index 6cdf4933b4..c5dcc12610 100644
--- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@@ -1,5 +1,6 @@
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel,
@@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
super().validate_credentials(model, credentials)
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user)
diff --git a/api/core/model_runtime/model_providers/spark/llm/_client.py b/api/core/model_runtime/model_providers/spark/llm/_client.py
index b99a657e71..48911f657a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_client.py
+++ b/api/core/model_runtime/model_providers/spark/llm/_client.py
@@ -25,6 +25,7 @@ class SparkLLMClient:
"spark-pro": {"version": "v3.1", "chat_domain": "generalv3"},
"spark-pro-128k": {"version": "pro-128k", "chat_domain": "pro-128k"},
"spark-max": {"version": "v3.5", "chat_domain": "generalv3.5"},
+ "spark-max-32k": {"version": "max-32k", "chat_domain": "max-32k"},
"spark-4.0-ultra": {"version": "v4.0", "chat_domain": "4.0Ultra"},
}
@@ -32,7 +33,7 @@ class SparkLLMClient:
self.chat_domain = model_api_configs[model]["chat_domain"]
- if model == "spark-pro-128k":
+ if model in ["spark-pro-128k", "spark-max-32k"]:
self.api_base = f"wss://{domain}/{endpoint}/{api_version}"
else:
self.api_base = f"wss://{domain}/{api_version}/{endpoint}"
diff --git a/api/core/model_runtime/model_providers/spark/llm/_position.yaml b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
index 458397f2aa..73f39cb119 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
@@ -1,3 +1,4 @@
+- spark-max-32k
- spark-4.0-ultra
- spark-max
- spark-pro-128k
diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py
index 57193dc031..1181ba699a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/llm.py
+++ b/api/core/model_runtime/model_providers/spark/llm/llm.py
@@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
:param prompt_messages: prompt messages
:return: llm response chunk generator result
"""
+ completion = ""
for index, content in enumerate(client.subscribe()):
if isinstance(content, dict):
delta = content["data"]
else:
delta = content
-
+ completion += delta
assistant_prompt_message = AssistantPromptMessage(
content=delta or "",
)
-
+ temp_assistant_prompt_message = AssistantPromptMessage(
+ content=completion,
+ )
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
- completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+ completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
new file mode 100644
index 0000000000..1a1ab6844c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/spark/llm/spark-max-32k.yaml
@@ -0,0 +1,33 @@
+model: spark-max-32k
+label:
+ en_US: Spark Max-32K
+model_type: llm
+model_properties:
+ mode: chat
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.5
+ help:
+ zh_Hans: 核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。
+ en_US: Kernel sampling threshold. Used to determine the randomness of the results. The higher the value, the stronger the randomness, that is, the higher the possibility of getting different answers to the same question.
+ - name: max_tokens
+ use_template: max_tokens
+ default: 4096
+ min: 1
+ max: 8192
+ help:
+ zh_Hans: 模型回答的tokens的最大长度。
+ en_US: Maximum length of tokens for the model response.
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ default: 4
+ min: 1
+ max: 6
+ help:
+ zh_Hans: 从 k 个候选中随机选择一个(非等概率)。
+ en_US: Randomly select one from k candidates (non-equal probability).
+ required: false
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
index aad07f5673..34a57d1fc0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: farui-plus
label:
en_US: farui-plus
@@ -62,16 +63,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index f90c7f075f..3e3585b30a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -18,7 +18,7 @@ from dashscope.common.error import (
UnsupportedModel,
)
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
ImagePromptMessageContent,
@@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
FetchFrom,
I18nObject,
ModelFeature,
+ ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
@@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling
:return:
"""
+ # Check if the model was added via get_customizable_model_schema
+ if self.get_customizable_model_schema(model, credentials) is not None:
+ # For custom models, tokens are not calculated.
+ return 0
+
if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
model = model.replace("-chat", "")
if model == "farui-plus":
@@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param credentials: model credentials
:return: AIModelEntity or None
"""
- rules = [
- ParameterRule(
- name="temperature",
- type=ParameterType.FLOAT,
- use_template="temperature",
- label=I18nObject(zh_Hans="温度", en_US="Temperature"),
- ),
- ParameterRule(
- name="top_p",
- type=ParameterType.FLOAT,
- use_template="top_p",
- label=I18nObject(zh_Hans="Top P", en_US="Top P"),
- ),
- ParameterRule(
- name="top_k",
- type=ParameterType.INT,
- min=0,
- max=99,
- label=I18nObject(zh_Hans="top_k", en_US="top_k"),
- ),
- ParameterRule(
- name="max_tokens",
- type=ParameterType.INT,
- min=1,
- max=128000,
- default=1024,
- label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
- ),
- ParameterRule(
- name="seed",
- type=ParameterType.INT,
- default=1234,
- label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
- ),
- ParameterRule(
- name="repetition_penalty",
- type=ParameterType.FLOAT,
- default=1.1,
- label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
- ),
- ]
-
- entity = AIModelEntity(
+ return AIModelEntity(
model=model,
- label=I18nObject(en_US=model),
- fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ label=I18nObject(en_US=model, zh_Hans=model),
model_type=ModelType.LLM,
- model_properties={},
- parameter_rules=rules,
+ features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
+ if credentials.get("function_calling_type") == "tool_call"
+ else [],
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={
+ ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
+ ModelPropertyKey.MODE: LLMMode.CHAT.value,
+ },
+ parameter_rules=[
+ ParameterRule(
+ name="temperature",
+ use_template="temperature",
+ label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="max_tokens",
+ use_template="max_tokens",
+ default=512,
+ min=1,
+ max=int(credentials.get("max_tokens", 1024)),
+ label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+ type=ParameterType.INT,
+ ),
+ ParameterRule(
+ name="top_p",
+ use_template="top_p",
+ label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="top_k",
+ use_template="top_k",
+ label=I18nObject(en_US="Top K", zh_Hans="Top K"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="frequency_penalty",
+ use_template="frequency_penalty",
+ label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
+ type=ParameterType.FLOAT,
+ ),
+ ],
)
-
- return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
index ebba565d57..64a3f33133 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-0919
label:
en_US: qwen-coder-turbo-0919
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
index 361e2c2373..a4c93f7047 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-latest
label:
en_US: qwen-coder-turbo-latest
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
index f4032a4dd3..ff68faed80 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo
label:
en_US: qwen-coder-turbo
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index dbe7d024a5..c3dbb3616f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-long
label:
en_US: qwen-long
@@ -63,16 +63,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
index 89d1302abe..42fe1f6862 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0816
label:
en_US: qwen-math-plus-0816
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
index 032b3c970d..9b6567b8cd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0919
label:
en_US: qwen-math-plus-0919
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
index 31dd9f6972..b2a2393b36 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-latest
label:
en_US: qwen-math-plus-latest
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
index 1a51d57f78..63f4b7ff0a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus
label:
en_US: qwen-math-plus
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
index 1894eea417..4da90eec3e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-0919
label:
en_US: qwen-math-turbo-0919
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
index b8365618b0..d29f8851dd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-latest
label:
en_US: qwen-math-turbo-latest
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
index 8d346d691e..2a8f7f725e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo
label:
en_US: qwen-math-turbo
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
index c0ad12b85e..ef1841b517 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0107
label:
en_US: qwen-max-0107
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index b00fb44d29..a2ea5df130 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0403, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0403
label:
en_US: qwen-max-0403
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index 1848dcc07d..a467665f11 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0428, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0428
label:
en_US: qwen-max-0428
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
index 238882bb12..78661eaea0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0919
label:
en_US: qwen-max-0919
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
index dc234783cd..6f4674576b 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-1201
label:
en_US: qwen-max-1201
@@ -66,12 +68,6 @@ parameter_rules:
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
index 9d7d3c2fcb..8b5f005473 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-latest
label:
en_US: qwen-max-latest
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index a7bdc42f73..098494ff95 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-longcontext
label:
en_US: qwen-max-longcontext
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index 57888406af..9d0d3f8db3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max
label:
en_US: qwen-max
@@ -62,6 +64,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
index 1e0b816617..0b1a6f81df 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0206
label:
en_US: qwen-plus-0206
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
index f70c373922..7706005bb5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0624
label:
en_US: qwen-plus-0624
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
index c6007e9164..348276fc08 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0723, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0723
label:
en_US: qwen-plus-0723
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
index 2f53c43336..29f125135e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0806, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0806
label:
en_US: qwen-plus-0806
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
index 90b54ca52e..905fa1e102 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0919
label:
en_US: qwen-plus-0919
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index 59e8851240..c7a3549727 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-chat
label:
en_US: qwen-plus-chat
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
index 2a821dbcfe..608f52c296 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-latest
label:
en_US: qwen-plus-latest
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index 626884f4b2..9089e57255 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus
label:
en_US: qwen-plus
@@ -62,6 +64,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
index 844fced77a..7ee0d44f2f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -1,3 +1,6 @@
+# this model corresponds to qwen-turbo-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
+
model: qwen-turbo-0206
label:
en_US: qwen-turbo-0206
@@ -60,16 +63,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
index 0152f75579..20a3f7eb64 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0624
label:
en_US: qwen-turbo-0624
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
index 19c6c8d293..ba73dec363 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0919
label:
en_US: qwen-turbo-0919
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index f557f311ef..d785b7fe85 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-chat
label:
en_US: qwen-turbo-chat
@@ -62,16 +64,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
index be2475847e..fe38a4283c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-latest
label:
en_US: qwen-turbo-latest
@@ -60,16 +62,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index 90f13dc19f..215c9ec5fc 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo
label:
en_US: qwen-turbo
@@ -62,6 +64,7 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
@@ -69,6 +72,9 @@ parameter_rules:
- name: enable_search
type: boolean
default: false
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
help:
zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
index 63b6074d0d..d80168ffc3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0201
label:
en_US: qwen-vl-max-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
index 41d45966e9..50e10226a5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0809
label:
en_US: qwen-vl-max-0809
@@ -9,6 +10,15 @@ model_properties:
mode: chat
context_size: 32000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -50,6 +60,18 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.02'
output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index 78d0509374..21b127f56c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max
label:
en_US: qwen-vl-max
@@ -9,6 +10,15 @@ model_properties:
mode: chat
context_size: 32000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -50,6 +60,18 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.02'
output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
index 8944388b1e..03cb039d15 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0201
label:
en_US: qwen-vl-plus-0201
@@ -9,6 +10,15 @@ model_properties:
mode: chat
context_size: 8000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -50,6 +60,18 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.02'
output: '0.02'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
index 869e0ea71c..67b2b2ebdd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0809
label:
en_US: qwen-vl-plus-0809
@@ -9,6 +10,15 @@ model_properties:
mode: chat
context_size: 32768
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -50,6 +60,18 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.008'
output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index da11bacc64..f55764c6c0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus
label:
en_US: qwen-vl-plus
@@ -9,6 +10,15 @@ model_properties:
mode: chat
context_size: 8000
parameter_rules:
+ - name: temperature
+ use_template: temperature
+ type: float
+ default: 0.3
+ min: 0.0
+ max: 2.0
+ help:
+ zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。
+ en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
type: float
@@ -50,6 +60,18 @@ parameter_rules:
en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
- name: response_format
use_template: response_format
+ - name: repetition_penalty
+ required: false
+ type: float
+ default: 1.1
+ label:
+ zh_Hans: 重复惩罚
+ en_US: Repetition penalty
+ help:
+ zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+ en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+ - name: response_format
+ use_template: response_format
pricing:
input: '0.008'
output: '0.008'
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
index cfe4b5a666..ea157f42de 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-1.5b-instruct
label:
en_US: qwen2-math-1.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
index e541c197b0..37052a9233 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-72b-instruct
label:
en_US: qwen2-math-72b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
index ba4514e3d6..e182f1c27f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-7b-instruct
label:
en_US: qwen2-math-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
index e5596041af..9e75ccc1f2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-0.5b-instruct
label:
en_US: qwen2.5-0.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
index 4004c59417..67c9d31243 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-1.5b-instruct
label:
en_US: qwen2.5-1.5b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
index d8f53666ce..2a38be921c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-14b-instruct
label:
en_US: qwen2.5-14b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
index 890f7e6e4e..e6e4fbf978 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-32b-instruct
label:
en_US: qwen2.5-32b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
index 6d3d2dd5bb..8f250379a7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-3b-instruct
label:
en_US: qwen2.5-3b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
index 17d0eb5b35..bb3cdd6141 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-72b-instruct
label:
en_US: qwen2.5-72b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
index 435b3f90a2..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-7b-instruct
label:
en_US: qwen2.5-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
index 435b3f90a2..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-7b-instruct
label:
en_US: qwen2.5-7b-instruct
@@ -60,16 +61,11 @@ parameter_rules:
type: float
default: 1.1
label:
+ zh_Hans: 重复惩罚
en_US: Repetition penalty
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
- - name: enable_search
- type: boolean
- default: false
- help:
- zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
- en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
- name: response_format
use_template: response_format
pricing:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
index f4303c53d3..52e35d8b50 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v1
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
index f6be3544ed..5bb6a8f424 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v2
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
index 171a379ee2..d8af0e2b63 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v3
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
index 5783d2e383..736cd44df8 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
import dashscope
import numpy as np
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage,
@@ -27,6 +28,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
credentials: dict,
texts: list[str],
user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +37,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index fabe6d90e6..1a09c20fd9 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -37,14 +37,51 @@ model_credential_schema:
en_US: Model Name
zh_Hans: 模型名称
placeholder:
- en_US: Enter full model name
- zh_Hans: 输入模型全称
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
credential_form_schemas:
- variable: dashscope_api_key
- required: true
label:
en_US: API Key
type: secret-input
+ required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
+ - variable: context_size
+ label:
+ zh_Hans: 模型上下文长度
+ en_US: Model context size
+ required: true
+ type: text-input
+ default: '4096'
+ placeholder:
+ zh_Hans: 在此输入您的模型上下文长度
+ en_US: Enter your Model context size
+ - variable: max_tokens
+ label:
+ zh_Hans: 最大 token 上限
+ en_US: Upper bound for max tokens
+ default: '4096'
+ type: text-input
+ show_on:
+ - variable: __model_type
+ value: llm
+ - variable: function_calling_type
+ label:
+ en_US: Function calling
+ type: select
+ required: false
+ default: no_call
+ options:
+ - value: no_call
+ label:
+ en_US: Not Support
+ zh_Hans: 不支持
+ - value: function_call
+ label:
+ en_US: Support
+ zh_Hans: 支持
+ show_on:
+ - variable: __model_type
+ value: llm
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
index edd4a36d98..b6509cd26c 100644
--- a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
from openai import OpenAI
from tokenizers import Tokenizer
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -22,7 +23,14 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
def _get_tokenizer(self) -> Tokenizer:
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
- def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: str | None = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -30,6 +38,7 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
index c308f0a322..f5386be06d 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-flash-001
label:
- en_US: Gemini 1.5 Flash
+ en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..97bd44f06b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
index 744863e773..5e08f2294e 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-pro-001
label:
- en_US: Gemini 1.5 Pro
+ en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..8f327ea2f3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
new file mode 100644
index 0000000000..0f5eb34c0c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-flash-experimental
+label:
+ en_US: Gemini Flash Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
new file mode 100644
index 0000000000..fa31cabb85
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-pro-experimental
+label:
+ en_US: Gemini Pro Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
index da69b7cdf3..1dd785d545 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
@@ -2,6 +2,7 @@ import base64
import io
import json
import logging
+import time
from collections.abc import Generator
from typing import Optional, Union, cast
@@ -20,7 +21,6 @@ from google.api_core import exceptions
from google.cloud import aiplatform
from google.oauth2 import service_account
from PIL import Image
-from vertexai.generative_models import HarmBlockThreshold, HarmCategory
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
@@ -34,6 +34,7 @@ from core.model_runtime.entities.message_entities import (
ToolPromptMessage,
UserPromptMessage,
)
+from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
@@ -503,20 +504,12 @@ class VertexAiLargeLanguageModel(LargeLanguageModel):
else:
history.append(content)
- safety_settings = {
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
- }
-
google_model = glm.GenerativeModel(model_name=model, system_instruction=system_instruction)
response = google_model.generate_content(
contents=history,
generation_config=glm.GenerationConfig(**config_kwargs),
stream=stream,
- safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
)
diff --git a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
index 519373a7f3..fce9544df0 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from google.cloud import aiplatform
from google.oauth2 import service_account
from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -30,7 +31,12 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -38,6 +44,8 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
diff --git a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
index 9cba2cb879..0dd4037c95 100644
--- a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
@@ -2,6 +2,7 @@ import time
from decimal import Decimal
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -41,7 +42,12 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -50,6 +56,7 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if ArkClientV3.is_legacy(credentials):
diff --git a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
index 4d6f6dccd0..c21d0c0552 100644
--- a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from typing import Any, Optional
import numpy as np
from requests import Response, post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import InvokeError
@@ -70,7 +71,12 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
return WenxinTextEmbedding(api_key, secret_key)
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -79,6 +85,7 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
index 8043af1d6c..1627239132 100644
--- a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
@@ -3,6 +3,7 @@ from typing import Optional
from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -25,7 +26,12 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -40,6 +46,7 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index b1f9b7485c..7fcf692202 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.1'
output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index 4e7d5fd3cc..fcd7c7768c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index 14f17db5d6..c9ae5abf19 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.01'
output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index 3361474d73..98c4f72c72 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0'
output: '0'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index bf0135d198..0b5391ce2f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 8192
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index ab4b32dd82..62f453fb77 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.1'
output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index d1b01731f5..350b080c3f 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -49,6 +49,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.001'
output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index 9ede308f18..2d7ebd71cf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -46,6 +46,15 @@ parameter_rules:
default: 1024
min: 1
max: 4095
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.05'
output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index 28286580a7..3a1120ff37 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -44,6 +44,15 @@ parameter_rules:
default: 1024
min: 1
max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.05'
output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index 4c5fa24034..14b9623e5a 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -44,6 +44,15 @@ parameter_rules:
default: 1024
min: 1
max: 1024
+ - name: web_search
+ type: boolean
+ label:
+ zh_Hans: 联网搜索
+ en_US: Web Search
+ default: false
+ help:
+ zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+ en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
pricing:
input: '0.01'
output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
index ee20954381..14a529dddf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
import time
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -15,7 +16,12 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -24,6 +30,7 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
index d8ecc31064..05510a3ec4 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py
@@ -57,7 +57,7 @@ class AsyncCompletions(BaseAPI):
if temperature <= 0:
do_sample = False
temperature = 0.01
- # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperture不生效)") # noqa: E501
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperature不生效)") # noqa: E501
if temperature >= 1:
temperature = 0.99
# logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
index 1c23473a03..8e5bb454e6 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py
@@ -60,7 +60,7 @@ class Completions(BaseAPI):
if temperature <= 0:
do_sample = False
temperature = 0.01
- # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperture不生效)") # noqa: E501
+ # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperature不生效)") # noqa: E501
if temperature >= 1:
temperature = 0.99
# logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间")
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
index 5e9a7e0a98..6d8ba700b7 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py
@@ -630,8 +630,7 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
return cast(_T, _validate_non_model_type(type_=type_, value=value))
-# our use of subclasssing here causes weirdness for type checkers,
-# so we just pretend that we don't subclass
+# Subclassing here confuses type checkers, so we treat this class as non-inheriting.
if TYPE_CHECKING:
GenericModel = BaseModel
else:
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
index d0f933d814..ffdafb85d5 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py
@@ -169,7 +169,7 @@ class BaseSyncPage(BasePage[_T], Generic[_T]):
# Pydantic uses a custom `__iter__` method to support casting BaseModels
# to dictionaries. e.g. dict(model).
# As we want to support `for item in page`, this is inherently incompatible
- # with the default pydantic behaviour. It is not possible to support both
+ # with the default pydantic behavior. It is not possible to support both
# use cases at once. Fortunately, this is not a big deal as all other pydantic
# methods should continue to work as expected as there is an alternative method
# to cast a model to a dictionary, model.dict(), which is used internally
@@ -356,16 +356,16 @@ class HttpClient:
**kwargs,
)
- def _object_to_formfata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
+ def _object_to_formdata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]:
items = []
if isinstance(value, Mapping):
for k, v in value.items():
- items.extend(self._object_to_formfata(f"{key}[{k}]", v))
+ items.extend(self._object_to_formdata(f"{key}[{k}]", v))
return items
if isinstance(value, list | tuple):
for v in value:
- items.extend(self._object_to_formfata(key + "[]", v))
+ items.extend(self._object_to_formdata(key + "[]", v))
return items
def _primitive_value_to_str(val) -> str:
@@ -385,7 +385,7 @@ class HttpClient:
return [(key, str_data)]
def _make_multipartform(self, data: Mapping[object, object]) -> dict[str, object]:
- items = flatten(list(starmap(self._object_to_formfata, data.items())))
+ items = flatten(list(starmap(self._object_to_formdata, data.items())))
serialized: dict[str, object] = {}
for key, value in items:
@@ -620,7 +620,7 @@ class HttpClient:
stream: bool,
stream_cls: type[StreamResponse] | None,
) -> ResponseT:
- # _legacy_response with raw_response_header to paser method
+ # _legacy_response with raw_response_header to parser method
if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
return cast(
ResponseT,
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
index 47183b9eee..51bf21bcdc 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py
@@ -87,7 +87,7 @@ class LegacyAPIResponse(Generic[R]):
For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
- You can customise the type that the response is parsed into through
+ You can customize the type that the response is parsed into through
the `to` argument, e.g.
```py
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
index 45443da662..92e6018055 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py
@@ -252,7 +252,7 @@ class APIResponse(BaseAPIResponse[R]):
For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
- You can customise the type that the response is parsed into through
+ You can customize the type that the response is parsed into through
the `to` argument, e.g.
```py
@@ -363,7 +363,7 @@ class StreamAlreadyConsumed(ZhipuAIError): # noqa: N818
# ^ error
```
- If you want this behaviour you'll need to either manually accumulate the response
+ If you want this behavior you'll need to either manually accumulate the response
content or call `await response.read()` before iterating over the stream.
"""
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
index 32e23e6dab..59cb41d712 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py
@@ -1,8 +1,8 @@
-from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessinfo
+from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessInfo
__all__ = [
"DocumentData",
"DocumentObject",
- "DocumentSuccessinfo",
+ "DocumentSuccessInfo",
"DocumentFailedInfo",
]
diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
index b9a1646391..980bc6f4a7 100644
--- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
+++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py
@@ -2,10 +2,10 @@ from typing import Optional
from ....core import BaseModel
-__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessinfo", "DocumentFailedInfo"]
+__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessInfo", "DocumentFailedInfo"]
-class DocumentSuccessinfo(BaseModel):
+class DocumentSuccessInfo(BaseModel):
documentId: Optional[str] = None
"""文件id"""
filename: Optional[str] = None
@@ -24,7 +24,7 @@ class DocumentFailedInfo(BaseModel):
class DocumentObject(BaseModel):
"""文档信息"""
- successInfos: Optional[list[DocumentSuccessinfo]] = None
+ successInfos: Optional[list[DocumentSuccessInfo]] = None
"""上传成功的文件信息"""
failedInfos: Optional[list[DocumentFailedInfo]] = None
"""上传失败的文件信息"""
diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
index 612542dab1..6dcd98dcfd 100644
--- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
+++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
@@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel):
class AnalyticdbVector(BaseVector):
- _instance = None
- _init = False
-
- def __new__(cls, *args, **kwargs):
- if cls._instance is None:
- cls._instance = super().__new__(cls)
- return cls._instance
-
def __init__(self, collection_name: str, config: AnalyticdbConfig):
- # collection_name must be updated every time
self._collection_name = collection_name.lower()
- if AnalyticdbVector._init:
- return
try:
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models
@@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector):
self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params())
self._client = Client(self._client_config)
self._initialize()
- AnalyticdbVector._init = True
def _initialize(self) -> None:
cache_key = f"vector_indexing_{self.config.instance_id}"
@@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector):
documents = []
for match in response.body.matches.match:
if match.score > score_threshold:
+ metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
- metadata=json.loads(match.metadata.get("metadata_")),
+ metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
@@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector):
for match in response.body.matches.match:
if match.score > score_threshold:
metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
vector=match.metadata.get("vector"),
metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def delete(self) -> None:
diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py
index 1a0dc7f48b..22e191340d 100644
--- a/api/core/rag/datasource/vdb/vector_base.py
+++ b/api/core/rag/datasource/vdb/vector_base.py
@@ -45,6 +45,7 @@ class BaseVector(ABC):
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
raise NotImplementedError
+ @abstractmethod
def delete(self) -> None:
raise NotImplementedError
diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py
index fe7eaa32e6..0ffc89b214 100644
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -124,7 +124,7 @@ class ExtractProcessor:
extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
elif file_extension == ".xml":
extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url)
- elif file_extension == "epub":
+ elif file_extension == ".epub":
extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url)
else:
# txt
@@ -146,7 +146,7 @@ class ExtractProcessor:
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
elif file_extension == ".csv":
extractor = CSVExtractor(file_path, autodetect_encoding=True)
- elif file_extension == "epub":
+ elif file_extension == ".epub":
extractor = UnstructuredEpubExtractor(file_path)
else:
# txt
diff --git a/api/core/tools/entities/common_entities.py b/api/core/tools/entities/common_entities.py
index 37a926697b..924e6fc0cf 100644
--- a/api/core/tools/entities/common_entities.py
+++ b/api/core/tools/entities/common_entities.py
@@ -1,6 +1,6 @@
from typing import Optional
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
class I18nObject(BaseModel):
@@ -8,16 +8,16 @@ class I18nObject(BaseModel):
Model class for i18n object.
"""
- zh_Hans: Optional[str] = None
- pt_BR: Optional[str] = None
en_US: str
+ zh_Hans: Optional[str] = Field(default=None)
+ pt_BR: Optional[str] = Field(default=None)
+ ja_JP: Optional[str] = Field(default=None)
def __init__(self, **data):
super().__init__(**data)
- if not self.zh_Hans:
- self.zh_Hans = self.en_US
- if not self.pt_BR:
- self.pt_BR = self.en_US
+ self.zh_Hans = self.zh_Hans or self.en_US
+ self.pt_BR = self.pt_BR or self.en_US
+ self.ja_JP = self.ja_JP or self.en_US
def to_dict(self) -> dict:
- return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR}
+ return {"zh_Hans": self.zh_Hans, "en_US": self.en_US, "pt_BR": self.pt_BR, "ja_JP": self.ja_JP}
diff --git a/api/core/tools/provider/builtin/arxiv/arxiv.yaml b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
index d26993b336..25aec97bb7 100644
--- a/api/core/tools/provider/builtin/arxiv/arxiv.yaml
+++ b/api/core/tools/provider/builtin/arxiv/arxiv.yaml
@@ -4,9 +4,11 @@ identity:
label:
en_US: ArXiv
zh_Hans: ArXiv
+ ja_JP: ArXiv
description:
en_US: Access to a vast repository of scientific papers and articles in various fields of research.
zh_Hans: 访问各个研究领域大量科学论文和文章的存储库。
+ ja_JP: 多様な研究分野の科学論文や記事の膨大なリポジトリへのアクセス。
icon: icon.svg
tags:
- search
diff --git a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
index 7439a48658..afc1925df3 100644
--- a/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
+++ b/api/core/tools/provider/builtin/arxiv/tools/arxiv_search.yaml
@@ -4,10 +4,12 @@ identity:
label:
en_US: Arxiv Search
zh_Hans: Arxiv 搜索
+ ja_JP: Arxiv 検索
description:
human:
en_US: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
zh_Hans: 一个用于从Arxiv存储库搜索科学论文和文章的工具。 输入可以是Arxiv ID或作者姓名。
+ ja_JP: Arxivリポジトリから科学論文や記事を検索するためのツールです。入力はArxiv IDまたは著者名にすることができます。
llm: A tool for searching scientific papers and articles from the Arxiv repository. Input can be an Arxiv ID or an author's name.
parameters:
- name: query
@@ -16,8 +18,10 @@ parameters:
label:
en_US: Query string
zh_Hans: 查询字符串
+ ja_JP: クエリ文字列
human_description:
en_US: The Arxiv ID or author's name used for searching.
zh_Hans: 用于搜索的Arxiv ID或作者姓名。
+ ja_JP: 検索に使用されるArxiv IDまたは著者名。
llm_description: The Arxiv ID or author's name used for searching.
form: llm
diff --git a/api/core/tools/provider/builtin/comfyui/comfyui.yaml b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
index 066fd85308..3891eebf3a 100644
--- a/api/core/tools/provider/builtin/comfyui/comfyui.yaml
+++ b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
@@ -39,4 +39,4 @@ credentials_for_provider:
en_US: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
zh_Hans: ComfyUI服务器的模型名称, 比如 xxx.safetensors
pt_BR: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
- url: https://docs.dify.ai/tutorials/tool-configuration/comfyui
+ url: https://github.com/comfyanonymous/ComfyUI#installing
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
index 9675b8eb91..15ab510c6c 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/crawl.py
@@ -35,10 +35,10 @@ class CrawlTool(BuiltinTool):
scrapeOptions["excludeTags"] = get_array_params(tool_parameters, "excludeTags")
scrapeOptions["onlyMainContent"] = tool_parameters.get("onlyMainContent", False)
scrapeOptions["waitFor"] = tool_parameters.get("waitFor", 0)
- scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in {None, ""}}
+ scrapeOptions = {k: v for k, v in scrapeOptions.items() if v not in (None, "")}
payload["scrapeOptions"] = scrapeOptions or None
- payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+ payload = {k: v for k, v in payload.items() if v not in (None, "")}
crawl_result = app.crawl_url(url=tool_parameters["url"], wait=wait_for_results, **payload)
diff --git a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
index 538b4a1fcb..f00a9b31ce 100644
--- a/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
+++ b/api/core/tools/provider/builtin/firecrawl/tools/scrape.py
@@ -29,10 +29,10 @@ class ScrapeTool(BuiltinTool):
extract["schema"] = get_json_params(tool_parameters, "schema")
extract["systemPrompt"] = tool_parameters.get("systemPrompt")
extract["prompt"] = tool_parameters.get("prompt")
- extract = {k: v for k, v in extract.items() if v not in {None, ""}}
+ extract = {k: v for k, v in extract.items() if v not in (None, "")}
payload["extract"] = extract or None
- payload = {k: v for k, v in payload.items() if v not in {None, ""}}
+ payload = {k: v for k, v in payload.items() if v not in (None, "")}
crawl_result = app.scrape_url(url=tool_parameters["url"], **payload)
markdown_result = crawl_result.get("data", {}).get("markdown", "")
diff --git a/api/core/tools/provider/builtin/jina/jina.yaml b/api/core/tools/provider/builtin/jina/jina.yaml
index 06f23382d9..346175c41f 100644
--- a/api/core/tools/provider/builtin/jina/jina.yaml
+++ b/api/core/tools/provider/builtin/jina/jina.yaml
@@ -2,9 +2,9 @@ identity:
author: Dify
name: jina
label:
- en_US: Jina
- zh_Hans: Jina
- pt_BR: Jina
+ en_US: Jina AI
+ zh_Hans: Jina AI
+ pt_BR: Jina AI
description:
en_US: Convert any URL to an LLM-friendly input or perform searches on the web for grounding information. Experience improved output for your agent and RAG systems at no cost.
zh_Hans: 将任何URL转换为LLM易读的输入或在网页上搜索引擎上搜索引擎。
@@ -22,11 +22,11 @@ credentials_for_provider:
zh_Hans: API 密钥(可留空)
pt_BR: Chave API (deixe vazio se você não tiver uma)
placeholder:
- en_US: Please enter your Jina API key
- zh_Hans: 请输入你的 Jina API 密钥
- pt_BR: Por favor, insira sua chave de API do Jina
+ en_US: Please enter your Jina AI API key
+ zh_Hans: 请输入你的 Jina AI API 密钥
+ pt_BR: Por favor, insira sua chave de API do Jina AI
help:
- en_US: Get your Jina API key from Jina (optional, but you can get a higher rate)
- zh_Hans: 从 Jina 获取您的 Jina API 密钥(非必须,能得到更高的速率)
- pt_BR: Obtenha sua chave de API do Jina na Jina (opcional, mas você pode obter uma taxa mais alta)
+ en_US: Get your Jina AI API key from Jina AI (optional, but you can get a higher rate)
+ zh_Hans: 从 Jina AI 获取您的 Jina AI API 密钥(非必须,能得到更高的速率)
+ pt_BR: Obtenha sua chave de API do Jina AI na Jina AI (opcional, mas você pode obter uma taxa mais alta)
url: https://jina.ai
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
index 58ad6d8694..589bc3433d 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
@@ -2,14 +2,14 @@ identity:
name: jina_reader
author: Dify
label:
- en_US: JinaReader
- zh_Hans: JinaReader
- pt_BR: JinaReader
+ en_US: Fetch Single Page
+ zh_Hans: 获取单页面
+ pt_BR: Fetch Single Page
description:
human:
- en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
- zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。
- pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
+ en_US: Fetch the target URL (can be a PDF) and convert it into a LLM-friendly markdown.
+ zh_Hans: 获取目标网址(可以是 PDF),并将其转换为适合大模型处理的 Markdown 格式。
+ pt_BR: Busque a URL de destino (que pode ser um PDF) e converta em um Markdown LLM-friendly.
llm: A tool for scraping webpages. Input should be a URL.
parameters:
- name: url
@@ -17,13 +17,13 @@ parameters:
required: true
label:
en_US: URL
- zh_Hans: 网页链接
+ zh_Hans: 网址
pt_BR: URL
human_description:
- en_US: used for linking to webpages
- zh_Hans: 用于链接到网页
- pt_BR: used for linking to webpages
- llm_description: url for scraping
+ en_US: Web link
+ zh_Hans: 网页链接
+ pt_BR: URL da web
+ llm_description: url para scraping
form: llm
- name: request_params
type: string
@@ -31,14 +31,14 @@ parameters:
label:
en_US: Request params
zh_Hans: 请求参数
- pt_BR: Request params
+ pt_BR: Parâmetros de solicitação
human_description:
en_US: |
request parameters, format: {"key1": "value1", "key2": "value2"}
zh_Hans: |
请求参数,格式:{"key1": "value1", "key2": "value2"}
pt_BR: |
- request parameters, format: {"key1": "value1", "key2": "value2"}
+ parâmetros de solicitação, formato: {"key1": "value1", "key2": "value2"}
llm_description: request parameters
form: llm
- name: target_selector
@@ -51,7 +51,7 @@ parameters:
human_description:
en_US: css selector for scraping specific elements
zh_Hans: css 选择器用于抓取特定元素
- pt_BR: css selector for scraping specific elements
+ pt_BR: css selector para scraping de elementos específicos
llm_description: css selector of the target element to scrape
form: form
- name: wait_for_selector
@@ -64,7 +64,7 @@ parameters:
human_description:
en_US: css selector for waiting for specific elements
zh_Hans: css 选择器用于等待特定元素
- pt_BR: css selector for waiting for specific elements
+ pt_BR: css selector para aguardar elementos específicos
llm_description: css selector of the target element to wait for
form: form
- name: image_caption
@@ -77,8 +77,8 @@ parameters:
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
- zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
- pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+ zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。"
+ pt_BR: "Adiciona legendas a todas as imagens na URL especificada, adicionando 'Imagem [idx]: [legenda]' como uma tag alt para aquelas que não têm uma. Isso permite que os modelos LLM inferiores interajam com as imagens em atividades como raciocínio e resumo."
llm_description: Captions all images at the specified URL
form: form
- name: gather_all_links_at_the_end
@@ -91,8 +91,8 @@ parameters:
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
- zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
- pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+ zh_Hans: 末尾将添加“按钮和链接”部分,方便下游模型或网络代理做页面导航或执行进一步操作。
+ pt_BR: Um "Botões & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
@@ -105,8 +105,8 @@ parameters:
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
- zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
- pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+ zh_Hans: 末尾会新增“图片”部分,方便下游模型全面了解页面的视觉内容,提升推理效果。
+ pt_BR: Um "Imagens" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
llm_description: Gather all images at the end
form: form
- name: proxy_server
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
index 2bc70e1be1..e58c639e56 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
@@ -2,13 +2,14 @@ identity:
name: jina_search
author: Dify
label:
- en_US: JinaSearch
- zh_Hans: JinaSearch
- pt_BR: JinaSearch
+ en_US: Search the web
+ zh_Hans: 联网搜索
+ pt_BR: Search the web
description:
human:
- en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web.
- zh_Hans: 在网络上搜索返回前 5 个结果。
+ en_US: Search on the public web of a given query and return the top results as LLM-friendly markdown.
+ zh_Hans: 针对给定的查询在互联网上进行搜索,并以适合大模型处理的 Markdown 格式返回最相关的结果。
+ pt_BR: Procurar na web pública de uma consulta fornecida e retornar os melhores resultados como markdown para LLMs.
llm: A tool for searching results on the web for grounding. Input should be a simple question.
parameters:
- name: query
@@ -16,11 +17,13 @@ parameters:
required: true
label:
en_US: Question (Query)
- zh_Hans: 信息查询
+ zh_Hans: 查询
+ pt_BR: Pergunta (Consulta)
human_description:
en_US: used to find information on the web
zh_Hans: 在网络上搜索信息
- llm_description: simple question to ask on the web
+ pt_BR: Usado para encontrar informações na web
+ llm_description: Pergunta simples para fazer na web
form: llm
- name: image_caption
type: boolean
@@ -32,7 +35,7 @@ parameters:
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
- zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
+ zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。"
pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
llm_description: Captions all images at the specified URL
form: form
@@ -46,8 +49,8 @@ parameters:
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
- zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
- pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+ zh_Hans: 末尾将添加“按钮和链接”部分,汇总页面上的所有链接。方便下游模型或网络代理做页面导航或执行进一步操作。
+ pt_BR: Um "Botão & Links" seção será criada no final. Isso ajuda os LLMs ou agentes da web navegando pela página ou executar ações adicionais.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
@@ -60,8 +63,8 @@ parameters:
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
- zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
- pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+ zh_Hans: 末尾会新增“图片”部分,汇总页面上的所有图片。方便下游模型概览页面的视觉内容,提升推理效果。
+ pt_BR: Um "Imagens" seção será criada no final. Isso fornece uma visão geral de todas as imagens na página para os LLMs, que pode melhorar a razão.
llm_description: Gather all images at the end
form: form
- name: proxy_server
@@ -74,7 +77,7 @@ parameters:
human_description:
en_US: Use proxy to access URLs
zh_Hans: 利用代理访问 URL
- pt_BR: Use proxy to access URLs
+ pt_BR: Usar proxy para acessar URLs
llm_description: Use proxy to access URLs
form: form
- name: no_cache
@@ -83,7 +86,7 @@ parameters:
default: false
label:
en_US: Bypass the Cache
- zh_Hans: 绕过缓存
+ zh_Hans: 是否绕过缓存
pt_BR: Ignorar o cache
human_description:
en_US: Bypass the Cache
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
index 62a5c7e7ba..74885cdf9a 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
@@ -2,11 +2,14 @@ identity:
name: jina_tokenizer
author: hjlarry
label:
- en_US: JinaTokenizer
+ en_US: Segment
+ zh_Hans: 切分器
+ pt_BR: Segment
description:
human:
- en_US: Free API to tokenize text and segment long text into chunks.
- zh_Hans: 免费的API可以将文本tokenize,也可以将长文本分割成多个部分。
+ en_US: Split long text into chunks and do tokenization.
+ zh_Hans: 将长文本拆分成小段落,并做分词处理。
+ pt_BR: Dividir o texto longo em pedaços e fazer tokenização.
llm: Free API to tokenize text and segment long text into chunks.
parameters:
- name: content
@@ -15,6 +18,7 @@ parameters:
label:
en_US: Content
zh_Hans: 内容
+ pt_BR: Conteúdo
llm_description: the content which need to tokenize or segment
form: llm
- name: return_tokens
@@ -23,18 +27,22 @@ parameters:
label:
en_US: Return the tokens
zh_Hans: 是否返回tokens
+ pt_BR: Retornar os tokens
human_description:
en_US: Return the tokens and their corresponding ids in the response.
zh_Hans: 返回tokens及其对应的ids。
+ pt_BR: Retornar os tokens e seus respectivos ids na resposta.
form: form
- name: return_chunks
type: boolean
label:
en_US: Return the chunks
zh_Hans: 是否分块
+ pt_BR: Retornar os chunks
human_description:
en_US: Chunking the input into semantically meaningful segments while handling a wide variety of text types and edge cases based on common structural cues.
- zh_Hans: 将输入分块为具有语义意义的片段,同时根据常见的结构线索处理各种文本类型和边缘情况。
+ zh_Hans: 将输入文本分块为语义有意义的片段,同时基于常见的结构线索处理各种文本类型和特殊情况。
+ pt_BR: Dividir o texto de entrada em segmentos semanticamente significativos, enquanto lida com uma ampla variedade de tipos de texto e casos de borda com base em pistas estruturais comuns.
form: form
- name: tokenizer
type: select
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.py b/api/core/tools/provider/builtin/stepfun/stepfun.py
index b24f730c95..239db85b11 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.py
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.py
@@ -16,7 +16,7 @@ class StepfunProvider(BuiltinToolProviderController):
user_id="",
tool_parameters={
"prompt": "cute girl, blue eyes, white hair, anime style",
- "size": "1024x1024",
+ "size": "256x256",
"n": 1,
},
)
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.yaml b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
index 1f841ec369..e8139a4d7d 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.yaml
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
@@ -4,11 +4,9 @@ identity:
label:
en_US: Image-1X
zh_Hans: 阶跃星辰绘画
- pt_BR: Image-1X
description:
en_US: Image-1X
zh_Hans: 阶跃星辰绘画
- pt_BR: Image-1X
icon: icon.png
tags:
- image
@@ -20,27 +18,16 @@ credentials_for_provider:
label:
en_US: Stepfun API key
zh_Hans: 阶跃星辰API key
- pt_BR: Stepfun API key
- help:
- en_US: Please input your stepfun API key
- zh_Hans: 请输入你的阶跃星辰 API key
- pt_BR: Please input your stepfun API key
placeholder:
- en_US: Please input your stepfun API key
+ en_US: Please input your Stepfun API key
zh_Hans: 请输入你的阶跃星辰 API key
- pt_BR: Please input your stepfun API key
+ url: https://platform.stepfun.com/interface-key
stepfun_base_url:
type: text-input
required: false
label:
en_US: Stepfun base URL
zh_Hans: 阶跃星辰 base URL
- pt_BR: Stepfun base URL
- help:
- en_US: Please input your Stepfun base URL
- zh_Hans: 请输入你的阶跃星辰 base URL
- pt_BR: Please input your Stepfun base URL
placeholder:
en_US: Please input your Stepfun base URL
zh_Hans: 请输入你的阶跃星辰 base URL
- pt_BR: Please input your Stepfun base URL
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.py b/api/core/tools/provider/builtin/stepfun/tools/image.py
index 0b92b122bf..eb55dae518 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.py
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.py
@@ -1,4 +1,3 @@
-import random
from typing import Any, Union
from openai import OpenAI
@@ -19,7 +18,7 @@ class StepfunTool(BuiltinTool):
"""
invoke tools
"""
- base_url = self.runtime.credentials.get("stepfun_base_url", "https://api.stepfun.com")
+ base_url = self.runtime.credentials.get("stepfun_base_url") or "https://api.stepfun.com"
base_url = str(URL(base_url) / "v1")
client = OpenAI(
@@ -28,9 +27,7 @@ class StepfunTool(BuiltinTool):
)
extra_body = {}
- model = tool_parameters.get("model", "step-1x-medium")
- if not model:
- return self.create_text_message("Please input model name")
+ model = "step-1x-medium"
# prompt
prompt = tool_parameters.get("prompt", "")
if not prompt:
@@ -67,9 +64,3 @@ class StepfunTool(BuiltinTool):
)
)
return result
-
- @staticmethod
- def _generate_random_id(length=8):
- characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
- random_id = "".join(random.choices(characters, k=length))
- return random_id
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.yaml b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
index dcc5bd2db2..8d7c9b6586 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.yaml
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
@@ -29,35 +29,6 @@ parameters:
pt_BR: Image prompt, you can check the official documentation of step-1x
llm_description: Image prompt of step-1x you should describe the image you want to generate as a list of words as possible as detailed
form: llm
- - name: model
- type: select
- required: false
- human_description:
- en_US: used for selecting the model name
- zh_Hans: 用于选择模型的名字
- pt_BR: used for selecting the model name
- label:
- en_US: Model Name
- zh_Hans: 模型名字
- pt_BR: Model Name
- form: form
- options:
- - value: step-1x-turbo
- label:
- en_US: turbo
- zh_Hans: turbo
- pt_BR: turbo
- - value: step-1x-medium
- label:
- en_US: medium
- zh_Hans: medium
- pt_BR: medium
- - value: step-1x-large
- label:
- en_US: large
- zh_Hans: large
- pt_BR: large
- default: step-1x-medium
- name: size
type: select
required: false
diff --git a/api/core/tools/provider/builtin/tavily/tavily.yaml b/api/core/tools/provider/builtin/tavily/tavily.yaml
index 7b25a81848..95820f4d18 100644
--- a/api/core/tools/provider/builtin/tavily/tavily.yaml
+++ b/api/core/tools/provider/builtin/tavily/tavily.yaml
@@ -28,4 +28,4 @@ credentials_for_provider:
en_US: Get your Tavily API key from Tavily
zh_Hans: 从 TavilyApi 获取您的 Tavily API key
pt_BR: Get your Tavily API key from Tavily
- url: https://docs.tavily.com/docs/tavily-api/introduction
+ url: https://docs.tavily.com/docs/welcome
diff --git a/api/core/tools/provider/builtin/xinference/_assets/icon.png b/api/core/tools/provider/builtin/xinference/_assets/icon.png
new file mode 100644
index 0000000000..e58cacbd12
Binary files /dev/null and b/api/core/tools/provider/builtin/xinference/_assets/icon.png differ
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
new file mode 100644
index 0000000000..847f2730f2
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.py
@@ -0,0 +1,412 @@
+import io
+import json
+from base64 import b64decode, b64encode
+from copy import deepcopy
+from typing import Any, Union
+
+from httpx import get, post
+from PIL import Image
+from yarl import URL
+
+from core.tools.entities.common_entities import I18nObject
+from core.tools.entities.tool_entities import (
+ ToolInvokeMessage,
+ ToolParameter,
+ ToolParameterOption,
+)
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.tool.builtin_tool import BuiltinTool
+
+# All commented out parameters default to null
+DRAW_TEXT_OPTIONS = {
+ # Prompts
+ "prompt": "",
+ "negative_prompt": "",
+ # "styles": [],
+ # Seeds
+ "seed": -1,
+ "subseed": -1,
+ "subseed_strength": 0,
+ "seed_resize_from_h": -1,
+ "seed_resize_from_w": -1,
+ # Samplers
+ "sampler_name": "DPM++ 2M",
+ # "scheduler": "",
+ # "sampler_index": "Automatic",
+ # Latent Space Options
+ "batch_size": 1,
+ "n_iter": 1,
+ "steps": 10,
+ "cfg_scale": 7,
+ "width": 512,
+ "height": 512,
+ # "restore_faces": True,
+ # "tiling": True,
+ "do_not_save_samples": False,
+ "do_not_save_grid": False,
+ # "eta": 0,
+ # "denoising_strength": 0.75,
+ # "s_min_uncond": 0,
+ # "s_churn": 0,
+ # "s_tmax": 0,
+ # "s_tmin": 0,
+ # "s_noise": 0,
+ "override_settings": {},
+ "override_settings_restore_afterwards": True,
+ # Refinement Options
+ "refiner_checkpoint": "",
+ "refiner_switch_at": 0,
+ "disable_extra_networks": False,
+ # "firstpass_image": "",
+ # "comments": "",
+ # High-Resolution Options
+ "enable_hr": False,
+ "firstphase_width": 0,
+ "firstphase_height": 0,
+ "hr_scale": 2,
+ # "hr_upscaler": "",
+ "hr_second_pass_steps": 0,
+ "hr_resize_x": 0,
+ "hr_resize_y": 0,
+ # "hr_checkpoint_name": "",
+ # "hr_sampler_name": "",
+ # "hr_scheduler": "",
+ "hr_prompt": "",
+ "hr_negative_prompt": "",
+ # Task Options
+ # "force_task_id": "",
+ # Script Options
+ # "script_name": "",
+ "script_args": [],
+ # Output Options
+ "send_images": True,
+ "save_images": False,
+ "alwayson_scripts": {},
+ # "infotext": "",
+}
+
+
+class StableDiffusionTool(BuiltinTool):
+ def _invoke(
+ self, user_id: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ invoke tools
+ """
+ # base url
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return self.create_text_message("Please input base_url")
+
+ if tool_parameters.get("model"):
+ self.runtime.credentials["model"] = tool_parameters["model"]
+
+ model = self.runtime.credentials.get("model", None)
+ if not model:
+ return self.create_text_message("Please input model")
+
+ # set model
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "options")
+ response = post(
+ url,
+ json={"sd_model_checkpoint": model},
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+ except Exception as e:
+ raise ToolProviderCredentialValidationError("Failed to set model, please tell user to set model")
+
+ # get image id and image variable
+ image_id = tool_parameters.get("image_id", "")
+ image_variable = self.get_default_image_variable()
+ # Return text2img if there's no image ID or no image variable
+ if not image_id or not image_variable:
+ return self.text2img(base_url=base_url, tool_parameters=tool_parameters)
+
+ # Proceed with image-to-image generation
+ return self.img2img(base_url=base_url, tool_parameters=tool_parameters)
+
+ def validate_models(self):
+ """
+ validate models
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ raise ToolProviderCredentialValidationError("Please input base_url")
+ model = self.runtime.credentials.get("model", None)
+ if not model:
+ raise ToolProviderCredentialValidationError("Please input model")
+
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+ response = get(url=api_url, timeout=10)
+ if response.status_code == 404:
+ # try draw a picture
+ self._invoke(
+ user_id="test",
+ tool_parameters={
+ "prompt": "a cat",
+ "width": 1024,
+ "height": 1024,
+ "steps": 1,
+ "lora": "",
+ },
+ )
+ elif response.status_code != 200:
+ raise ToolProviderCredentialValidationError("Failed to get models")
+ else:
+ models = [d["model_name"] for d in response.json()]
+ if len([d for d in models if d == model]) > 0:
+ return self.create_text_message(json.dumps(models))
+ else:
+ raise ToolProviderCredentialValidationError(f"model {model} does not exist")
+ except Exception as e:
+ raise ToolProviderCredentialValidationError(f"Failed to get models, {e}")
+
+ def get_sd_models(self) -> list[str]:
+ """
+ get sd models
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return []
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "sd-models")
+ response = get(url=api_url, timeout=120)
+ if response.status_code != 200:
+ return []
+ else:
+ return [d["model_name"] for d in response.json()]
+ except Exception as e:
+ return []
+
+ def get_sample_methods(self) -> list[str]:
+ """
+ get sample method
+ """
+ try:
+ base_url = self.runtime.credentials.get("base_url", None)
+ if not base_url:
+ return []
+ api_url = str(URL(base_url) / "sdapi" / "v1" / "samplers")
+ response = get(url=api_url, timeout=120)
+ if response.status_code != 200:
+ return []
+ else:
+ return [d["name"] for d in response.json()]
+ except Exception as e:
+ return []
+
+ def img2img(
+ self, base_url: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ generate image
+ """
+
+ # Fetch the binary data of the image
+ image_variable = self.get_default_image_variable()
+ image_binary = self.get_variable_file(image_variable.name)
+ if not image_binary:
+ return self.create_text_message("Image not found, please request user to generate image firstly.")
+
+ # Convert image to RGB and save as PNG
+ try:
+ with Image.open(io.BytesIO(image_binary)) as image, io.BytesIO() as buffer:
+ image.convert("RGB").save(buffer, format="PNG")
+ image_binary = buffer.getvalue()
+ except Exception as e:
+ return self.create_text_message(f"Failed to process the image: {str(e)}")
+
+ # copy draw options
+ draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+ # set image options
+ model = tool_parameters.get("model", "")
+ draw_options_image = {
+ "init_images": [b64encode(image_binary).decode("utf-8")],
+ "denoising_strength": 0.9,
+ "restore_faces": False,
+ "script_args": [],
+ "override_settings": {"sd_model_checkpoint": model},
+ "resize_mode": 0,
+ "image_cfg_scale": 0,
+ # "mask": None,
+ "mask_blur_x": 4,
+ "mask_blur_y": 4,
+ "mask_blur": 0,
+ "mask_round": True,
+ "inpainting_fill": 0,
+ "inpaint_full_res": True,
+ "inpaint_full_res_padding": 0,
+ "inpainting_mask_invert": 0,
+ "initial_noise_multiplier": 0,
+ # "latent_mask": None,
+ "include_init_images": True,
+ }
+ # update key and values
+ draw_options.update(draw_options_image)
+ draw_options.update(tool_parameters)
+
+ # get prompt lora model
+ prompt = tool_parameters.get("prompt", "")
+ lora = tool_parameters.get("lora", "")
+ model = tool_parameters.get("model", "")
+ if lora:
+ draw_options["prompt"] = f"{lora},{prompt}"
+ else:
+ draw_options["prompt"] = prompt
+
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "img2img")
+ response = post(
+ url,
+ json=draw_options,
+ timeout=120,
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ return self.create_text_message("Failed to generate image")
+
+ image = response.json()["images"][0]
+
+ return self.create_blob_message(
+ blob=b64decode(image),
+ meta={"mime_type": "image/png"},
+ save_as=self.VariableKey.IMAGE.value,
+ )
+
+ except Exception as e:
+ return self.create_text_message("Failed to generate image")
+
+ def text2img(
+ self, base_url: str, tool_parameters: dict[str, Any]
+ ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+ """
+ generate image
+ """
+ # copy draw options
+ draw_options = deepcopy(DRAW_TEXT_OPTIONS)
+ draw_options.update(tool_parameters)
+ # get prompt lora model
+ prompt = tool_parameters.get("prompt", "")
+ lora = tool_parameters.get("lora", "")
+ model = tool_parameters.get("model", "")
+ if lora:
+ draw_options["prompt"] = f"{lora},{prompt}"
+ else:
+ draw_options["prompt"] = prompt
+ draw_options["override_settings"]["sd_model_checkpoint"] = model
+
+ try:
+ url = str(URL(base_url) / "sdapi" / "v1" / "txt2img")
+ response = post(
+ url,
+ json=draw_options,
+ timeout=120,
+ headers={"Authorization": f"Bearer {self.runtime.credentials['api_key']}"},
+ )
+ if response.status_code != 200:
+ return self.create_text_message("Failed to generate image")
+
+ image = response.json()["images"][0]
+
+ return self.create_blob_message(
+ blob=b64decode(image),
+ meta={"mime_type": "image/png"},
+ save_as=self.VariableKey.IMAGE.value,
+ )
+
+ except Exception as e:
+ return self.create_text_message("Failed to generate image")
+
+ def get_runtime_parameters(self) -> list[ToolParameter]:
+ parameters = [
+ ToolParameter(
+ name="prompt",
+ label=I18nObject(en_US="Prompt", zh_Hans="Prompt"),
+ human_description=I18nObject(
+ en_US="Image prompt, you can check the official documentation of Stable Diffusion",
+ zh_Hans="图像提示词,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.STRING,
+ form=ToolParameter.ToolParameterForm.LLM,
+ llm_description="Image prompt of Stable Diffusion, you should describe the image you want to generate"
+ " as a list of words as possible as detailed, the prompt must be written in English.",
+ required=True,
+ ),
+ ]
+ if len(self.list_default_image_variables()) != 0:
+ parameters.append(
+ ToolParameter(
+ name="image_id",
+ label=I18nObject(en_US="image_id", zh_Hans="image_id"),
+ human_description=I18nObject(
+ en_US="Image id of the image you want to generate based on, if you want to generate image based"
+ " on the default image, you can leave this field empty.",
+ zh_Hans="您想要生成的图像的图像 ID,如果您想要基于默认图像生成图像,则可以将此字段留空。",
+ ),
+ type=ToolParameter.ToolParameterType.STRING,
+ form=ToolParameter.ToolParameterForm.LLM,
+ llm_description="Image id of the original image, you can leave this field empty if you want to"
+ " generate a new image.",
+ required=True,
+ options=[
+ ToolParameterOption(value=i.name, label=I18nObject(en_US=i.name, zh_Hans=i.name))
+ for i in self.list_default_image_variables()
+ ],
+ )
+ )
+
+ if self.runtime.credentials:
+ try:
+ models = self.get_sd_models()
+ if len(models) != 0:
+ parameters.append(
+ ToolParameter(
+ name="model",
+ label=I18nObject(en_US="Model", zh_Hans="Model"),
+ human_description=I18nObject(
+ en_US="Model of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ zh_Hans="Stable Diffusion 的模型,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.SELECT,
+ form=ToolParameter.ToolParameterForm.FORM,
+ llm_description="Model of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ required=True,
+ default=models[0],
+ options=[
+ ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in models
+ ],
+ )
+ )
+
+ except:
+ pass
+
+ sample_methods = self.get_sample_methods()
+ if len(sample_methods) != 0:
+ parameters.append(
+ ToolParameter(
+ name="sampler_name",
+ label=I18nObject(en_US="Sampling method", zh_Hans="Sampling method"),
+ human_description=I18nObject(
+ en_US="Sampling method of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ zh_Hans="Stable Diffusion 的Sampling method,您可以查看 Stable Diffusion 的官方文档",
+ ),
+ type=ToolParameter.ToolParameterType.SELECT,
+ form=ToolParameter.ToolParameterForm.FORM,
+ llm_description="Sampling method of Stable Diffusion, you can check the official documentation"
+ " of Stable Diffusion",
+ required=True,
+ default=sample_methods[0],
+ options=[
+ ToolParameterOption(value=i, label=I18nObject(en_US=i, zh_Hans=i)) for i in sample_methods
+ ],
+ )
+ )
+ return parameters
diff --git a/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
new file mode 100644
index 0000000000..4f1d17f175
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/tools/stable_diffusion.yaml
@@ -0,0 +1,87 @@
+identity:
+ name: stable_diffusion
+ author: xinference
+ label:
+ en_US: Stable Diffusion
+ zh_Hans: Stable Diffusion
+description:
+ human:
+ en_US: Generate images using Stable Diffusion models.
+ zh_Hans: 使用 Stable Diffusion 模型生成图片。
+ llm: draw the image you want based on your prompt.
+parameters:
+ - name: prompt
+ type: string
+ required: true
+ label:
+ en_US: Prompt
+ zh_Hans: 提示词
+ human_description:
+ en_US: Image prompt
+ zh_Hans: 图像提示词
+ llm_description: Image prompt of Stable Diffusion, you should describe the image you want to generate as a list of words as possible as detailed, the prompt must be written in English.
+ form: llm
+ - name: model
+ type: string
+ required: false
+ label:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ human_description:
+ en_US: Model Name
+ zh_Hans: 模型名称
+ form: form
+ - name: lora
+ type: string
+ required: false
+ label:
+ en_US: Lora
+ zh_Hans: Lora
+ human_description:
+ en_US: Lora
+ zh_Hans: Lora
+ form: form
+ - name: steps
+ type: number
+ required: false
+ label:
+ en_US: Steps
+ zh_Hans: Steps
+ human_description:
+ en_US: Steps
+ zh_Hans: Steps
+ form: form
+ default: 10
+ - name: width
+ type: number
+ required: false
+ label:
+ en_US: Width
+ zh_Hans: Width
+ human_description:
+ en_US: Width
+ zh_Hans: Width
+ form: form
+ default: 1024
+ - name: height
+ type: number
+ required: false
+ label:
+ en_US: Height
+ zh_Hans: Height
+ human_description:
+ en_US: Height
+ zh_Hans: Height
+ form: form
+ default: 1024
+ - name: negative_prompt
+ type: string
+ required: false
+ label:
+ en_US: Negative prompt
+ zh_Hans: Negative prompt
+ human_description:
+ en_US: Negative prompt
+ zh_Hans: Negative prompt
+ form: form
+ default: bad art, ugly, deformed, watermark, duplicated, discontinuous lines
diff --git a/api/core/tools/provider/builtin/xinference/xinference.py b/api/core/tools/provider/builtin/xinference/xinference.py
new file mode 100644
index 0000000000..7c2428cc00
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.py
@@ -0,0 +1,18 @@
+import requests
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class XinferenceProvider(BuiltinToolProviderController):
+ def _validate_credentials(self, credentials: dict) -> None:
+ base_url = credentials.get("base_url")
+ api_key = credentials.get("api_key")
+ model = credentials.get("model")
+ res = requests.post(
+ f"{base_url}/sdapi/v1/options",
+ headers={"Authorization": f"Bearer {api_key}"},
+ json={"sd_model_checkpoint": model},
+ )
+ if res.status_code != 200:
+ raise ToolProviderCredentialValidationError("Xinference API key is invalid")
diff --git a/api/core/tools/provider/builtin/xinference/xinference.yaml b/api/core/tools/provider/builtin/xinference/xinference.yaml
new file mode 100644
index 0000000000..19aaf5cbd1
--- /dev/null
+++ b/api/core/tools/provider/builtin/xinference/xinference.yaml
@@ -0,0 +1,40 @@
+identity:
+ author: xinference
+ name: xinference
+ label:
+ en_US: Xinference
+ zh_Hans: Xinference
+ description:
+ zh_Hans: Xinference 提供的兼容 Stable Diffusion web ui 的图片生成 API。
+ en_US: Stable Diffusion web ui compatible API provided by Xinference.
+ icon: icon.png
+ tags:
+ - image
+credentials_for_provider:
+ base_url:
+ type: secret-input
+ required: true
+ label:
+ en_US: Base URL
+ zh_Hans: Xinference 服务器的 Base URL
+ placeholder:
+ en_US: Please input Xinference server's Base URL
+ zh_Hans: 请输入 Xinference 服务器的 Base URL
+ model:
+ type: text-input
+ required: true
+ label:
+ en_US: Model
+ zh_Hans: 模型
+ placeholder:
+ en_US: Please input your model name
+ zh_Hans: 请输入你的模型名称
+ api_key:
+ type: secret-input
+ required: true
+ label:
+ en_US: API Key
+ zh_Hans: Xinference 服务器的 API Key
+ placeholder:
+ en_US: Please input Xinference server's API Key
+ zh_Hans: 请输入 Xinference 服务器的 API Key
diff --git a/api/core/tools/provider/builtin/youtube/youtube.py b/api/core/tools/provider/builtin/youtube/youtube.py
index aad876491c..07e430bcbf 100644
--- a/api/core/tools/provider/builtin/youtube/youtube.py
+++ b/api/core/tools/provider/builtin/youtube/youtube.py
@@ -13,7 +13,7 @@ class YahooFinanceProvider(BuiltinToolProviderController):
).invoke(
user_id="",
tool_parameters={
- "channel": "TOKYO GIRLS COLLECTION",
+ "channel": "UC2JZCsZSOudXA08cMMRCL9g",
"start_date": "2020-01-01",
"end_date": "2024-12-31",
},
diff --git a/api/core/tools/provider/tool_provider.py b/api/core/tools/provider/tool_provider.py
index 05c88b904e..321b212014 100644
--- a/api/core/tools/provider/tool_provider.py
+++ b/api/core/tools/provider/tool_provider.py
@@ -153,6 +153,9 @@ class ToolProviderController(BaseModel, ABC):
# check type
credential_schema = credentials_need_to_validate[credential_name]
+ if not credential_schema.required and credentials[credential_name] is None:
+ continue
+
if credential_schema.type in {
ToolProviderCredentials.CredentialsType.SECRET_INPUT,
ToolProviderCredentials.CredentialsType.TEXT_INPUT,
diff --git a/api/core/workflow/nodes/end/end_stream_processor.py b/api/core/workflow/nodes/end/end_stream_processor.py
index 0366d7965d..1aecf863ac 100644
--- a/api/core/workflow/nodes/end/end_stream_processor.py
+++ b/api/core/workflow/nodes/end/end_stream_processor.py
@@ -22,8 +22,8 @@ class EndStreamProcessor(StreamProcessor):
for end_node_id, _ in self.end_stream_param.end_stream_variable_selector_mapping.items():
self.route_position[end_node_id] = 0
self.current_stream_chunk_generating_node_ids: dict[str, list[str]] = {}
- self.has_outputed = False
- self.outputed_node_ids = set()
+ self.has_output = False
+ self.output_node_ids = set()
def process(self, generator: Generator[GraphEngineEvent, None, None]) -> Generator[GraphEngineEvent, None, None]:
for event in generator:
@@ -34,11 +34,11 @@ class EndStreamProcessor(StreamProcessor):
yield event
elif isinstance(event, NodeRunStreamChunkEvent):
if event.in_iteration_id:
- if self.has_outputed and event.node_id not in self.outputed_node_ids:
+ if self.has_output and event.node_id not in self.output_node_ids:
event.chunk_content = "\n" + event.chunk_content
- self.outputed_node_ids.add(event.node_id)
- self.has_outputed = True
+ self.output_node_ids.add(event.node_id)
+ self.has_output = True
yield event
continue
@@ -53,11 +53,11 @@ class EndStreamProcessor(StreamProcessor):
)
if stream_out_end_node_ids:
- if self.has_outputed and event.node_id not in self.outputed_node_ids:
+ if self.has_output and event.node_id not in self.output_node_ids:
event.chunk_content = "\n" + event.chunk_content
- self.outputed_node_ids.add(event.node_id)
- self.has_outputed = True
+ self.output_node_ids.add(event.node_id)
+ self.has_output = True
yield event
elif isinstance(event, NodeRunSucceededEvent):
yield event
@@ -124,11 +124,11 @@ class EndStreamProcessor(StreamProcessor):
if text:
current_node_id = value_selector[0]
- if self.has_outputed and current_node_id not in self.outputed_node_ids:
+ if self.has_output and current_node_id not in self.output_node_ids:
text = "\n" + text
- self.outputed_node_ids.add(current_node_id)
- self.has_outputed = True
+ self.output_node_ids.add(current_node_id)
+ self.has_output = True
yield NodeRunStreamChunkEvent(
id=event.id,
node_id=event.node_id,
diff --git a/api/poetry.lock b/api/poetry.lock
index 78816683d8..bce21fb547 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -2333,13 +2333,13 @@ develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk",
[[package]]
name = "elasticsearch"
-version = "8.14.0"
+version = "8.15.1"
description = "Python client for Elasticsearch"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "elasticsearch-8.14.0-py3-none-any.whl", hash = "sha256:cef8ef70a81af027f3da74a4f7d9296b390c636903088439087b8262a468c130"},
- {file = "elasticsearch-8.14.0.tar.gz", hash = "sha256:aa2490029dd96f4015b333c1827aa21fd6c0a4d223b00dfb0fe933b8d09a511b"},
+ {file = "elasticsearch-8.15.1-py3-none-any.whl", hash = "sha256:02a0476e98768a30d7926335fc0d305c04fdb928eea1354c6e6040d8c2814569"},
+ {file = "elasticsearch-8.15.1.tar.gz", hash = "sha256:40c0d312f8adf8bdc81795bc16a0b546ddf544cb1f90e829a244e4780c4dbfd8"},
]
[package.dependencies]
@@ -2347,7 +2347,10 @@ elastic-transport = ">=8.13,<9"
[package.extras]
async = ["aiohttp (>=3,<4)"]
+dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "nox", "numpy", "orjson", "pandas", "pyarrow", "pytest", "pytest-asyncio", "pytest-cov", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "twine", "unasync"]
+docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"]
orjson = ["orjson (>=3)"]
+pyarrow = ["pyarrow (>=1)"]
requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"]
vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"]
@@ -4135,6 +4138,20 @@ files = [
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
]
+[[package]]
+name = "jsonlines"
+version = "4.0.0"
+description = "Library with helpers for the jsonlines file format"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55"},
+ {file = "jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74"},
+]
+
+[package.dependencies]
+attrs = ">=19.2.0"
+
[[package]]
name = "jsonpath-ng"
version = "1.6.1"
@@ -4469,6 +4486,24 @@ files = [
{file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"},
]
+[[package]]
+name = "loguru"
+version = "0.7.2"
+description = "Python logging made (stupidly) simple"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"},
+ {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"},
+]
+
+[package.dependencies]
+colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
+win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
+
+[package.extras]
+dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"]
+
[[package]]
name = "lxml"
version = "5.3.0"
@@ -5320,6 +5355,36 @@ plot = ["matplotlib"]
tgrep = ["pyparsing"]
twitter = ["twython"]
+[[package]]
+name = "nomic"
+version = "3.1.2"
+description = "The official Nomic python client."
+optional = false
+python-versions = "*"
+files = [
+ {file = "nomic-3.1.2.tar.gz", hash = "sha256:2de1ab1dcf2429011c92987bb2f1eafe1a3a4901c3185b18f994bf89616f606d"},
+]
+
+[package.dependencies]
+click = "*"
+jsonlines = "*"
+loguru = "*"
+numpy = "*"
+pandas = "*"
+pillow = "*"
+pyarrow = "*"
+pydantic = "*"
+pyjwt = "*"
+requests = "*"
+rich = "*"
+tqdm = "*"
+
+[package.extras]
+all = ["nomic[aws,local]"]
+aws = ["boto3", "sagemaker"]
+dev = ["black (==24.3.0)", "cairosvg", "coverage", "isort", "mkautodoc", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]", "myst-parser", "nomic[all]", "pandas", "pillow", "pylint", "pyright", "pytest", "pytorch-lightning", "twine"]
+local = ["gpt4all (>=2.5.0,<3)"]
+
[[package]]
name = "novita-client"
version = "0.5.7"
@@ -9919,6 +9984,20 @@ files = [
beautifulsoup4 = "*"
requests = ">=2.0.0,<3.0.0"
+[[package]]
+name = "win32-setctime"
+version = "1.1.0"
+description = "A small Python utility to set file creation time on Windows"
+optional = false
+python-versions = ">=3.5"
+files = [
+ {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"},
+ {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"},
+]
+
+[package.extras]
+dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
+
[[package]]
name = "wrapt"
version = "1.16.0"
@@ -10422,4 +10501,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
-content-hash = "eb7ef7be5c7790e214f37f17f92b69407ad557cb80055ef7e49e36eb51b3fca6"
+content-hash = "69b42bb1ff033f14e199fee8335356275099421d72bbd7037b7a991ea65cae08"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 506f379aaf..f004865d5f 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -28,7 +28,6 @@ select = [
"PLR0402", # manual-from-import
"PLR1711", # useless-return
"PLR1714", # repeated-equality-comparison
- "PLR6201", # literal-membership
"RUF019", # unnecessary-key-check
"RUF100", # unused-noqa
"RUF101", # redirected-noqa
@@ -101,6 +100,7 @@ exclude = [
OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa"
+NOMIC_API_KEY = "nk-aaaaaaaaaaaaaaaaaaaa"
AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"
@@ -122,6 +122,7 @@ CODE_EXECUTION_API_KEY = "dify-sandbox"
FIRECRAWL_API_KEY = "fc-"
TEI_EMBEDDING_SERVER_URL = "http://a.abc.com:11451"
TEI_RERANK_SERVER_URL = "http://a.abc.com:11451"
+MIXEDBREAD_API_KEY = "mk-aaaaaaaaaaaaaaaaaaaa"
[tool.poetry]
name = "dify-api"
@@ -218,6 +219,7 @@ azure-ai-inference = "^1.0.0b3"
volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"}
oci = "^2.133.0"
tos = "^2.7.1"
+nomic = "^3.1.2"
[tool.poetry.group.indriect.dependencies]
kaleido = "0.2.1"
rank-bm25 = "~0.2.2"
@@ -251,7 +253,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
alibabacloud_tea_openapi = "~0.3.9"
chromadb = "0.5.1"
clickhouse-connect = "~0.7.16"
-elasticsearch = "8.14.0"
+elasticsearch = "~8.15.1"
oracledb = "~2.2.1"
pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
pgvector = "0.2.5"
diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py
index 30c010ef29..e96f06ed40 100644
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -1100,8 +1100,8 @@ class DocumentService:
DocumentService.data_source_args_validate(args)
DocumentService.process_rule_args_validate(args)
else:
- if ("data_source" not in args and not args["data_source"]) and (
- "process_rule" not in args and not args["process_rule"]
+ if ("data_source" not in args or not args["data_source"]) and (
+ "process_rule" not in args or not args["process_rule"]
):
raise ValueError("Data source or Process rule is required")
else:
diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py
index 7ae1b9f231..2bc48c4185 100644
--- a/api/services/tools/tools_transform_service.py
+++ b/api/services/tools/tools_transform_service.py
@@ -74,12 +74,14 @@ class ToolTransformService:
en_US=provider_controller.identity.description.en_US,
zh_Hans=provider_controller.identity.description.zh_Hans,
pt_BR=provider_controller.identity.description.pt_BR,
+ ja_JP=provider_controller.identity.description.ja_JP,
),
icon=provider_controller.identity.icon,
label=I18nObject(
en_US=provider_controller.identity.label.en_US,
zh_Hans=provider_controller.identity.label.zh_Hans,
pt_BR=provider_controller.identity.label.pt_BR,
+ ja_JP=provider_controller.identity.label.ja_JP,
),
type=ToolProviderType.BUILT_IN,
masked_credentials={},
diff --git a/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
new file mode 100644
index 0000000000..281e866e45
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/__mock/nomic_embeddings.py
@@ -0,0 +1,59 @@
+import os
+from collections.abc import Callable
+from typing import Any, Literal, Union
+
+import pytest
+
+# import monkeypatch
+from _pytest.monkeypatch import MonkeyPatch
+from nomic import embed
+
+
+def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict:
+ texts_len = len(texts)
+
+ foo_embedding_sample = 0.123456
+
+ combined = {
+ "embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)],
+ "usage": {"prompt_tokens": texts_len, "total_tokens": texts_len},
+ "model": model,
+ "inference_mode": "remote",
+ }
+
+ return combined
+
+
+def mock_nomic(
+ monkeypatch: MonkeyPatch,
+ methods: list[Literal["text_embedding"]],
+) -> Callable[[], None]:
+ """
+ mock nomic module
+
+ :param monkeypatch: pytest monkeypatch fixture
+ :return: unpatch function
+ """
+
+ def unpatch() -> None:
+ monkeypatch.undo()
+
+ if "text_embedding" in methods:
+ monkeypatch.setattr(embed, "text", create_embedding)
+
+ return unpatch
+
+
+MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
+
+
+@pytest.fixture
+def setup_nomic_mock(request, monkeypatch):
+ methods = request.param if hasattr(request, "param") else []
+ if MOCK:
+ unpatch = mock_nomic(monkeypatch, methods=methods)
+
+ yield
+
+ if MOCK:
+ unpatch()
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
new file mode 100644
index 0000000000..7bf723b3a9
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
@@ -0,0 +1,54 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.text_embedding.text_embedding import FireworksTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+ model = FireworksTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": "invalid_key"}
+ )
+
+ model.validate_credentials(
+ model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}
+ )
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_openai_mock):
+ model = FireworksTextEmbeddingModel()
+
+ result = model.invoke(
+ model="nomic-ai/nomic-embed-text-v1.5",
+ credentials={
+ "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+ },
+ texts=["hello", "world", " ".join(["long_text"] * 100), " ".join(["another_long_text"] * 100)],
+ user="foo",
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert len(result.embeddings) == 4
+ assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+ model = FireworksTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model="nomic-ai/nomic-embed-text-v1.5",
+ credentials={
+ "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+ },
+ texts=["hello", "world"],
+ )
+
+ assert num_tokens == 2
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/__init__.py b/api/tests/integration_tests/model_runtime/mixedbread/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py b/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py
new file mode 100644
index 0000000000..25c9f3ce8d
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_provider.py
@@ -0,0 +1,28 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.mixedbread import MixedBreadProvider
+
+
+def test_validate_provider_credentials():
+ provider = MixedBreadProvider()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ provider.validate_provider_credentials(credentials={"api_key": "hahahaha"})
+ with patch("requests.post") as mock_post:
+ mock_response = Mock()
+ mock_response.json.return_value = {
+ "usage": {"prompt_tokens": 3, "total_tokens": 3},
+ "model": "mixedbread-ai/mxbai-embed-large-v1",
+ "data": [{"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"}],
+ "object": "list",
+ "normalized": "true",
+ "encoding_format": "float",
+ "dimensions": 1024,
+ }
+ mock_response.status_code = 200
+ mock_post.return_value = mock_response
+ provider.validate_provider_credentials(credentials={"api_key": os.environ.get("MIXEDBREAD_API_KEY")})
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py b/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py
new file mode 100644
index 0000000000..b65aab74aa
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_rerank.py
@@ -0,0 +1,100 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.entities.rerank_entities import RerankResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.rerank.rerank import MixedBreadRerankModel
+
+
+def test_validate_credentials():
+ model = MixedBreadRerankModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model="mxbai-rerank-large-v1",
+ credentials={"api_key": "invalid_key"},
+ )
+ with patch("httpx.post") as mock_post:
+ mock_response = Mock()
+ mock_response.json.return_value = {
+ "usage": {"prompt_tokens": 86, "total_tokens": 86},
+ "model": "mixedbread-ai/mxbai-rerank-large-v1",
+ "data": [
+ {
+ "index": 0,
+ "score": 0.06762695,
+ "input": "Carson City is the capital city of the American state of Nevada. At the 2010 United "
+ "States Census, Carson City had a population of 55,274.",
+ "object": "text_document",
+ },
+ {
+ "index": 1,
+ "score": 0.057403564,
+ "input": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific "
+ "Ocean that are a political division controlled by the United States. Its capital is "
+ "Saipan.",
+ "object": "text_document",
+ },
+ ],
+ "object": "list",
+ "top_k": 2,
+ "return_input": True,
+ }
+ mock_response.status_code = 200
+ mock_post.return_value = mock_response
+ model.validate_credentials(
+ model="mxbai-rerank-large-v1",
+ credentials={
+ "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+ },
+ )
+
+
+def test_invoke_model():
+ model = MixedBreadRerankModel()
+ with patch("httpx.post") as mock_post:
+ mock_response = Mock()
+ mock_response.json.return_value = {
+ "usage": {"prompt_tokens": 56, "total_tokens": 56},
+ "model": "mixedbread-ai/mxbai-rerank-large-v1",
+ "data": [
+ {
+ "index": 0,
+ "score": 0.6044922,
+ "input": "Kasumi is a girl name of Japanese origin meaning mist.",
+ "object": "text_document",
+ },
+ {
+ "index": 1,
+ "score": 0.0703125,
+ "input": "Her music is a kawaii bass, a mix of future bass, pop, and kawaii music and she leads a "
+ "team named PopiParty.",
+ "object": "text_document",
+ },
+ ],
+ "object": "list",
+ "top_k": 2,
+ "return_input": "true",
+ }
+ mock_response.status_code = 200
+ mock_post.return_value = mock_response
+ result = model.invoke(
+ model="mxbai-rerank-large-v1",
+ credentials={
+ "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+ },
+ query="Who is Kasumi?",
+ docs=[
+ "Kasumi is a girl name of Japanese origin meaning mist.",
+ "Her music is a kawaii bass, a mix of future bass, pop, and kawaii music and she leads a team named "
+ "PopiParty.",
+ ],
+ score_threshold=0.5,
+ )
+
+ assert isinstance(result, RerankResult)
+ assert len(result.docs) == 1
+ assert result.docs[0].index == 0
+ assert result.docs[0].score >= 0.5
diff --git a/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py b/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py
new file mode 100644
index 0000000000..ca97a18951
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/mixedbread/test_text_embedding.py
@@ -0,0 +1,78 @@
+import os
+from unittest.mock import Mock, patch
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.mixedbread.text_embedding.text_embedding import MixedBreadTextEmbeddingModel
+
+
+def test_validate_credentials():
+ model = MixedBreadTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(model="mxbai-embed-large-v1", credentials={"api_key": "invalid_key"})
+ with patch("requests.post") as mock_post:
+ mock_response = Mock()
+ mock_response.json.return_value = {
+ "usage": {"prompt_tokens": 3, "total_tokens": 3},
+ "model": "mixedbread-ai/mxbai-embed-large-v1",
+ "data": [{"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"}],
+ "object": "list",
+ "normalized": "true",
+ "encoding_format": "float",
+ "dimensions": 1024,
+ }
+ mock_response.status_code = 200
+ mock_post.return_value = mock_response
+ model.validate_credentials(
+ model="mxbai-embed-large-v1", credentials={"api_key": os.environ.get("MIXEDBREAD_API_KEY")}
+ )
+
+
+def test_invoke_model():
+ model = MixedBreadTextEmbeddingModel()
+
+ with patch("requests.post") as mock_post:
+ mock_response = Mock()
+ mock_response.json.return_value = {
+ "usage": {"prompt_tokens": 6, "total_tokens": 6},
+ "model": "mixedbread-ai/mxbai-embed-large-v1",
+ "data": [
+ {"embedding": [0.23333 for _ in range(1024)], "index": 0, "object": "embedding"},
+ {"embedding": [0.23333 for _ in range(1024)], "index": 1, "object": "embedding"},
+ ],
+ "object": "list",
+ "normalized": "true",
+ "encoding_format": "float",
+ "dimensions": 1024,
+ }
+ mock_response.status_code = 200
+ mock_post.return_value = mock_response
+ result = model.invoke(
+ model="mxbai-embed-large-v1",
+ credentials={
+ "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+ },
+ texts=["hello", "world"],
+ user="abc-123",
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert len(result.embeddings) == 2
+ assert result.usage.total_tokens == 6
+
+
+def test_get_num_tokens():
+ model = MixedBreadTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model="mxbai-embed-large-v1",
+ credentials={
+ "api_key": os.environ.get("MIXEDBREAD_API_KEY"),
+ },
+ texts=["ping"],
+ )
+
+ assert num_tokens == 1
diff --git a/api/tests/integration_tests/model_runtime/nomic/__init__.py b/api/tests/integration_tests/model_runtime/nomic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
new file mode 100644
index 0000000000..52dc96ee95
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_embeddings.py
@@ -0,0 +1,62 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": "invalid_key",
+ },
+ )
+
+ model.validate_credentials(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ )
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ result = model.invoke(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ texts=["hello", "world"],
+ user="foo",
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert result.model == "nomic-embed-text-v1.5"
+ assert len(result.embeddings) == 2
+ assert result.usage.total_tokens == 2
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_get_num_tokens(setup_nomic_mock):
+ model = NomicTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model="nomic-embed-text-v1.5",
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ texts=["hello", "world"],
+ )
+
+ assert num_tokens == 2
diff --git a/api/tests/integration_tests/model_runtime/nomic/test_provider.py b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
new file mode 100644
index 0000000000..6cad400c06
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/nomic/test_provider.py
@@ -0,0 +1,22 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.nomic.nomic import NomicAtlasProvider
+from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
+
+
+@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
+def test_validate_provider_credentials(setup_nomic_mock):
+ provider = NomicAtlasProvider()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ provider.validate_provider_credentials(credentials={})
+
+ provider.validate_provider_credentials(
+ credentials={
+ "nomic_api_key": os.environ.get("NOMIC_API_KEY"),
+ },
+ )
diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh
index 4c1c6bf4f3..b60ff64fdc 100755
--- a/dev/pytest/pytest_model_runtime.sh
+++ b/dev/pytest/pytest_model_runtime.sh
@@ -7,4 +7,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
api/tests/integration_tests/model_runtime/upstage \
- api/tests/integration_tests/model_runtime/fireworks
+ api/tests/integration_tests/model_runtime/fireworks \
+ api/tests/integration_tests/model_runtime/nomic \
+ api/tests/integration_tests/model_runtime/mixedbread
diff --git a/docker/.env.example b/docker/.env.example
index c892c15636..d43c3edc7e 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region
# ------------------------------
# The type of vector store to use.
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`.
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`.
VECTOR_STORE=weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -385,13 +385,30 @@ MYSCALE_PASSWORD=
MYSCALE_DATABASE=dify
MYSCALE_FTS_PARAMS=
-# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector`
+# pgvector configurations, only available when VECTOR_STORE is `pgvector`
PGVECTOR_HOST=pgvector
PGVECTOR_PORT=5432
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=difyai123456
PGVECTOR_DATABASE=dify
+# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs`
+PGVECTO_RS_HOST=pgvecto-rs
+PGVECTO_RS_PORT=5432
+PGVECTO_RS_USER=postgres
+PGVECTO_RS_PASSWORD=difyai123456
+PGVECTO_RS_DATABASE=dify
+
+# analyticdb configurations, only available when VECTOR_STORE is `analyticdb`
+ANALYTICDB_KEY_ID=your-ak
+ANALYTICDB_KEY_SECRET=your-sk
+ANALYTICDB_REGION_ID=cn-hangzhou
+ANALYTICDB_INSTANCE_ID=gp-ab123456
+ANALYTICDB_ACCOUNT=testaccount
+ANALYTICDB_PASSWORD=testpassword
+ANALYTICDB_NAMESPACE=dify
+ANALYTICDB_NAMESPACE_PASSWORD=difypassword
+
# TiDB vector configurations, only available when VECTOR_STORE is `tidb`
TIDB_VECTOR_HOST=tidb
TIDB_VECTOR_PORT=4000
@@ -563,6 +580,15 @@ CODE_MAX_STRING_ARRAY_LENGTH=30
CODE_MAX_OBJECT_ARRAY_LENGTH=30
CODE_MAX_NUMBER_ARRAY_LENGTH=1000
+# Workflow runtime configuration
+WORKFLOW_MAX_EXECUTION_STEPS=500
+WORKFLOW_MAX_EXECUTION_TIME=1200
+WORKFLOW_CALL_MAX_DEPTH=5
+
+# HTTP request node in workflow configuration
+HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
+HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
+
# SSRF Proxy server HTTP URL
SSRF_PROXY_HTTP_URL=http://ssrf_proxy:3128
# SSRF Proxy server HTTPS URL
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index e72c3724f9..95e271a0e9 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -202,8 +202,13 @@ x-shared-env: &shared-api-worker-env
CODE_MAX_STRING_ARRAY_LENGTH: ${CODE_MAX_STRING_ARRAY_LENGTH:-30}
CODE_MAX_OBJECT_ARRAY_LENGTH: ${CODE_MAX_OBJECT_ARRAY_LENGTH:-30}
CODE_MAX_NUMBER_ARRAY_LENGTH: ${CODE_MAX_NUMBER_ARRAY_LENGTH:-1000}
+ WORKFLOW_MAX_EXECUTION_STEPS: ${WORKFLOW_MAX_EXECUTION_STEPS:-500}
+ WORKFLOW_MAX_EXECUTION_TIME: ${WORKFLOW_MAX_EXECUTION_TIME:-1200}
+ WORKFLOW_CALL_MAX_DEPTH: ${WORKFLOW_MAX_EXECUTION_TIME:-5}
SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}
+ HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
+ HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
services:
# API service
@@ -625,7 +630,7 @@ services:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-prod-prerequisites
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:8.14.3
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.15.1
container_name: elasticsearch
profiles:
- elasticsearch
@@ -652,7 +657,7 @@ services:
# https://www.elastic.co/guide/en/kibana/current/docker.html
# https://www.elastic.co/guide/en/kibana/current/settings.html
kibana:
- image: docker.elastic.co/kibana/kibana:8.14.3
+ image: docker.elastic.co/kibana/kibana:8.15.1
container_name: kibana
profiles:
- elasticsearch
diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py
index 2be079bdf3..5e42507a42 100644
--- a/sdks/python-client/dify_client/client.py
+++ b/sdks/python-client/dify_client/client.py
@@ -1,103 +1,80 @@
import json
+
import requests
class DifyClient:
- def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1'):
+ def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1"):
self.api_key = api_key
self.base_url = base_url
def _send_request(self, method, endpoint, json=None, params=None, stream=False):
- headers = {
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json"
- }
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
url = f"{self.base_url}{endpoint}"
response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream)
return response
-
def _send_request_with_files(self, method, endpoint, data, files):
- headers = {
- "Authorization": f"Bearer {self.api_key}"
- }
+ headers = {"Authorization": f"Bearer {self.api_key}"}
url = f"{self.base_url}{endpoint}"
response = requests.request(method, url, data=data, headers=headers, files=files)
return response
-
+
def message_feedback(self, message_id, rating, user):
- data = {
- "rating": rating,
- "user": user
- }
+ data = {"rating": rating, "user": user}
return self._send_request("POST", f"/messages/{message_id}/feedbacks", data)
-
+
def get_application_parameters(self, user):
params = {"user": user}
return self._send_request("GET", "/parameters", params=params)
-
+
def file_upload(self, user, files):
- data = {
- "user": user
- }
+ data = {"user": user}
return self._send_request_with_files("POST", "/files/upload", data=data, files=files)
- def text_to_audio(self, text:str, user:str, streaming:bool=False):
- data = {
- "text": text,
- "user": user,
- "streaming": streaming
- }
+ def text_to_audio(self, text: str, user: str, streaming: bool = False):
+ data = {"text": text, "user": user, "streaming": streaming}
return self._send_request("POST", "/text-to-audio", data=data)
-
- def get_meta(self,user):
- params = { "user": user}
- return self._send_request("GET", f"/meta", params=params)
+
+ def get_meta(self, user):
+ params = {"user": user}
+ return self._send_request("GET", "/meta", params=params)
class CompletionClient(DifyClient):
def create_completion_message(self, inputs, response_mode, user, files=None):
- data = {
- "inputs": inputs,
- "response_mode": response_mode,
- "user": user,
- "files": files
- }
- return self._send_request("POST", "/completion-messages", data,
- stream=True if response_mode == "streaming" else False)
+ data = {"inputs": inputs, "response_mode": response_mode, "user": user, "files": files}
+ return self._send_request(
+ "POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False
+ )
class ChatClient(DifyClient):
def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None):
- data = {
- "inputs": inputs,
- "query": query,
- "user": user,
- "response_mode": response_mode,
- "files": files
- }
+ data = {"inputs": inputs, "query": query, "user": user, "response_mode": response_mode, "files": files}
if conversation_id:
data["conversation_id"] = conversation_id
- return self._send_request("POST", "/chat-messages", data,
- stream=True if response_mode == "streaming" else False)
-
- def get_suggested(self, message_id, user:str):
+ return self._send_request(
+ "POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False
+ )
+
+ def get_suggested(self, message_id, user: str):
params = {"user": user}
return self._send_request("GET", f"/messages/{message_id}/suggested", params=params)
-
+
def stop_message(self, task_id, user):
data = {"user": user}
- return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
+ return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
def get_conversations(self, user, last_id=None, limit=None, pinned=None):
params = {"user": user, "last_id": last_id, "limit": limit, "pinned": pinned}
return self._send_request("GET", "/conversations", params=params)
-
+
def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None):
params = {"user": user}
@@ -109,15 +86,15 @@ class ChatClient(DifyClient):
params["limit"] = limit
return self._send_request("GET", "/messages", params=params)
-
- def rename_conversation(self, conversation_id, name,auto_generate:bool, user:str):
- data = {"name": name, "auto_generate": auto_generate,"user": user}
+
+ def rename_conversation(self, conversation_id, name, auto_generate: bool, user: str):
+ data = {"name": name, "auto_generate": auto_generate, "user": user}
return self._send_request("POST", f"/conversations/{conversation_id}/name", data)
def delete_conversation(self, conversation_id, user):
data = {"user": user}
return self._send_request("DELETE", f"/conversations/{conversation_id}", data)
-
+
def audio_to_text(self, audio_file, user):
data = {"user": user}
files = {"audio_file": audio_file}
@@ -125,10 +102,10 @@ class ChatClient(DifyClient):
class WorkflowClient(DifyClient):
- def run(self, inputs:dict, response_mode:str="streaming", user:str="abc-123"):
+ def run(self, inputs: dict, response_mode: str = "streaming", user: str = "abc-123"):
data = {"inputs": inputs, "response_mode": response_mode, "user": user}
return self._send_request("POST", "/workflows/run", data)
-
+
def stop(self, task_id, user):
data = {"user": user}
return self._send_request("POST", f"/workflows/tasks/{task_id}/stop", data)
@@ -137,10 +114,8 @@ class WorkflowClient(DifyClient):
return self._send_request("GET", f"/workflows/run/{workflow_run_id}")
-
class KnowledgeBaseClient(DifyClient):
-
- def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1', dataset_id: str = None):
+ def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1", dataset_id: str = None):
"""
Construct a KnowledgeBaseClient object.
@@ -150,10 +125,7 @@ class KnowledgeBaseClient(DifyClient):
dataset_id (str, optional): ID of the dataset. Defaults to None. You don't need this if you just want to
create a new dataset. or list datasets. otherwise you need to set this.
"""
- super().__init__(
- api_key=api_key,
- base_url=base_url
- )
+ super().__init__(api_key=api_key, base_url=base_url)
self.dataset_id = dataset_id
def _get_dataset_id(self):
@@ -162,10 +134,10 @@ class KnowledgeBaseClient(DifyClient):
return self.dataset_id
def create_dataset(self, name: str, **kwargs):
- return self._send_request('POST', '/datasets', {'name': name}, **kwargs)
+ return self._send_request("POST", "/datasets", {"name": name}, **kwargs)
def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs):
- return self._send_request('GET', f'/datasets?page={page}&limit={page_size}', **kwargs)
+ return self._send_request("GET", f"/datasets?page={page}&limit={page_size}", **kwargs)
def create_document_by_text(self, name, text, extra_params: dict = None, **kwargs):
"""
@@ -193,14 +165,7 @@ class KnowledgeBaseClient(DifyClient):
}
:return: Response from the API
"""
- data = {
- 'indexing_technique': 'high_quality',
- 'process_rule': {
- 'mode': 'automatic'
- },
- 'name': name,
- 'text': text
- }
+ data = {"indexing_technique": "high_quality", "process_rule": {"mode": "automatic"}, "name": name, "text": text}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
url = f"/datasets/{self._get_dataset_id()}/document/create_by_text"
@@ -233,10 +198,7 @@ class KnowledgeBaseClient(DifyClient):
}
:return: Response from the API
"""
- data = {
- 'name': name,
- 'text': text
- }
+ data = {"name": name, "text": text}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_text"
@@ -269,16 +231,11 @@ class KnowledgeBaseClient(DifyClient):
:return: Response from the API
"""
files = {"file": open(file_path, "rb")}
- data = {
- 'process_rule': {
- 'mode': 'automatic'
- },
- 'indexing_technique': 'high_quality'
- }
+ data = {"process_rule": {"mode": "automatic"}, "indexing_technique": "high_quality"}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
if original_document_id is not None:
- data['original_document_id'] = original_document_id
+ data["original_document_id"] = original_document_id
url = f"/datasets/{self._get_dataset_id()}/document/create_by_file"
return self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files)
@@ -352,11 +309,11 @@ class KnowledgeBaseClient(DifyClient):
"""
params = {}
if page is not None:
- params['page'] = page
+ params["page"] = page
if page_size is not None:
- params['limit'] = page_size
+ params["limit"] = page_size
if keyword is not None:
- params['keyword'] = keyword
+ params["keyword"] = keyword
url = f"/datasets/{self._get_dataset_id()}/documents"
return self._send_request("GET", url, params=params, **kwargs)
@@ -383,9 +340,9 @@ class KnowledgeBaseClient(DifyClient):
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments"
params = {}
if keyword is not None:
- params['keyword'] = keyword
+ params["keyword"] = keyword
if status is not None:
- params['status'] = status
+ params["status"] = status
if "params" in kwargs:
params.update(kwargs["params"])
return self._send_request("GET", url, params=params, **kwargs)
diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx
index 90874f50ce..0f18544335 100644
--- a/web/app/activate/page.tsx
+++ b/web/app/activate/page.tsx
@@ -22,7 +22,7 @@ const Activate = () => {
{children}
)
- }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props])
+ }
+ else if (language === 'svg' && isSVG) {
+ return (
+ {children}
+
+ return (
+