diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml
new file mode 100644
index 0000000000..5aee64b8e6
--- /dev/null
+++ b/.github/workflows/web-tests.yml
@@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+ pull_request:
+ branches:
+ - main
+ paths:
+ - web/**
+
+concurrency:
+ group: web-tests-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ test:
+ name: Web Tests
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ working-directory: ./web
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Check changed files
+ id: changed-files
+ uses: tj-actions/changed-files@v45
+ with:
+ files: web/**
+
+ - name: Setup Node.js
+ uses: actions/setup-node@v4
+ if: steps.changed-files.outputs.any_changed == 'true'
+ with:
+ node-version: 20
+ cache: yarn
+ cache-dependency-path: ./web/package.json
+
+ - name: Install dependencies
+ if: steps.changed-files.outputs.any_changed == 'true'
+ run: yarn install --frozen-lockfile
+
+ - name: Run tests
+ if: steps.changed-files.outputs.any_changed == 'true'
+ run: yarn test
diff --git a/api/app.py b/api/app.py
index 91a49337fc..1b58beee15 100644
--- a/api/app.py
+++ b/api/app.py
@@ -53,11 +53,9 @@ from services.account_service import AccountService
warnings.simplefilter("ignore", ResourceWarning)
-# fix windows platform
-if os.name == "nt":
- os.system('tzutil /s "UTC"')
-else:
- os.environ["TZ"] = "UTC"
+os.environ["TZ"] = "UTC"
+# windows platform not support tzset
+if hasattr(time, "tzset"):
time.tzset()
diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py
index 1b412b8639..203aca3384 100644
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -309,7 +309,7 @@ class AppRunner:
if not prompt_messages:
prompt_messages = result.prompt_messages
- if not usage and result.delta.usage:
+ if result.delta.usage:
usage = result.delta.usage
if not usage:
diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py
index 8ce12fd59f..75219051cd 100644
--- a/api/core/embedding/cached_embedding.py
+++ b/api/core/embedding/cached_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional, cast
import numpy as np
from sqlalchemy.exc import IntegrityError
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@@ -56,7 +57,9 @@ class CacheEmbedding(Embeddings):
for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks]
- embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+ embedding_result = self._model_instance.invoke_text_embedding(
+ texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+ )
for vector in embedding_result.embeddings:
try:
@@ -100,7 +103,9 @@ class CacheEmbedding(Embeddings):
redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try:
- embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+ embedding_result = self._model_instance.invoke_text_embedding(
+ texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+ )
embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
diff --git a/api/core/embedding/embedding_constant.py b/api/core/embedding/embedding_constant.py
new file mode 100644
index 0000000000..9b4934646b
--- /dev/null
+++ b/api/core/embedding/embedding_constant.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+ """
+ Enum for embedding input type.
+ """
+
+ DOCUMENT = "document"
+ QUERY = "query"
diff --git a/api/core/model_manager.py b/api/core/model_manager.py
index 990efd36c6..74b4452362 100644
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -3,6 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast
+from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError
@@ -158,12 +159,15 @@ class ModelInstance:
tools=tools,
)
- def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+ def invoke_text_embedding(
+ self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+ ) -> TextEmbeddingResult:
"""
Invoke large language model
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if not isinstance(self.model_type_instance, TextEmbeddingModel):
@@ -176,6 +180,7 @@ class ModelInstance:
credentials=self.credentials,
texts=texts,
user=user,
+ input_type=input_type,
)
def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
diff --git a/api/core/model_runtime/model_providers/__base/text_embedding_model.py b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
index 54a4486023..a948dca20d 100644
--- a/api/core/model_runtime/model_providers/__base/text_embedding_model.py
+++ b/api/core/model_runtime/model_providers/__base/text_embedding_model.py
@@ -4,6 +4,7 @@ from typing import Optional
from pydantic import ConfigDict
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel
@@ -20,35 +21,47 @@ class TextEmbeddingModel(AIModel):
model_config = ConfigDict(protected_namespaces=())
def invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
- Invoke large language model
+ Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
self.started_at = time.perf_counter()
try:
- return self._invoke(model, credentials, texts, user)
+ return self._invoke(model, credentials, texts, user, input_type)
except Exception as e:
raise self._transform_invoke_error(e)
@abstractmethod
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
- Invoke large language model
+ Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
raise NotImplementedError
diff --git a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
index d9cff8ecbb..8701a38050 100644
--- a/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/azure_openai/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
import tiktoken
from openai import AzureOpenAI
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -17,8 +18,23 @@ from core.model_runtime.model_providers.azure_openai._constant import EMBEDDING_
class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
base_model_name = credentials["base_model_name"]
credentials_kwargs = self._to_credential_kwargs(credentials)
client = AzureOpenAI(**credentials_kwargs)
diff --git a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
index 779dfbb608..56b9be1c36 100644
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -35,7 +36,12 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "http://api.baichuan-ai.com/v1/embeddings"
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -44,6 +50,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
index 251170d1ae..d9c5726592 100644
--- a/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/bedrock/text_embedding/text_embedding.py
@@ -13,6 +13,7 @@ from botocore.exceptions import (
UnknownServiceError,
)
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -30,7 +31,12 @@ logger = logging.getLogger(__name__)
class BedrockTextEmbeddingModel(TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -39,6 +45,7 @@ class BedrockTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
client_config = Config(region_name=credentials["aws_region"])
diff --git a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
index a1c5e98118..4da2080690 100644
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ import cohere
import numpy as np
from cohere.core import RequestOptions
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -34,6 +40,7 @@ class CohereTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
index f886fa23b5..cdb87a55e9 100644
--- a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
+++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml
@@ -15,6 +15,7 @@ help:
en_US: https://fireworks.ai/account/api-keys
supported_model_types:
- llm
+ - text-embedding
configurate_methods:
- predefined-model
provider_credential_schema:
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
new file mode 100644
index 0000000000..31415a24fa
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-11b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 11B Vision Instruct
+ en_US: Llama 3.2 11B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.2'
+ output: '0.2'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
new file mode 100644
index 0000000000..c2fd77d256
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-1b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-1b-instruct
+label:
+ zh_Hans: Llama 3.2 1B Instruct
+ en_US: Llama 3.2 1B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
new file mode 100644
index 0000000000..4b3c459c7b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-3b-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-3b-instruct
+label:
+ zh_Hans: Llama 3.2 3B Instruct
+ en_US: Llama 3.2 3B Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.1'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
new file mode 100644
index 0000000000..0aece7455d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p2-90b-vision-instruct.yaml
@@ -0,0 +1,46 @@
+model: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
+label:
+ zh_Hans: Llama 3.2 90B Vision Instruct
+ en_US: Llama 3.2 90B Vision Instruct
+model_type: llm
+features:
+ - agent-thought
+ - tool-call
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ - name: max_tokens
+ use_template: max_tokens
+ - name: context_length_exceeded_behavior
+ default: None
+ label:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ help:
+ zh_Hans: 上下文长度超出行为
+ en_US: Context Length Exceeded Behavior
+ type: string
+ options:
+ - None
+ - truncate
+ - error
+ - name: response_format
+ use_template: response_format
+pricing:
+ input: '0.9'
+ output: '0.9'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
new file mode 100644
index 0000000000..d7c11691cf
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/UAE-Large-V1.yaml
@@ -0,0 +1,12 @@
+model: WhereIsAI/UAE-Large-V1
+label:
+ zh_Hans: UAE-Large-V1
+ en_US: UAE-Large-V1
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
new file mode 100644
index 0000000000..d09bafb4d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-base.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-base
+label:
+ zh_Hans: GTE-base
+ en_US: GTE-base
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
new file mode 100644
index 0000000000..c41fa2f9d3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/gte-large.yaml
@@ -0,0 +1,12 @@
+model: thenlper/gte-large
+label:
+ zh_Hans: GTE-large
+ en_US: GTE-large
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
new file mode 100644
index 0000000000..c9098503d9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.5.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1.5
+label:
+ zh_Hans: nomic-embed-text-v1.5
+ en_US: nomic-embed-text-v1.5
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+ max_chunks: 16
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
new file mode 100644
index 0000000000..89078d3ff6
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/nomic-embed-text-v1.yaml
@@ -0,0 +1,12 @@
+model: nomic-ai/nomic-embed-text-v1
+label:
+ zh_Hans: nomic-embed-text-v1
+ en_US: nomic-embed-text-v1
+model_type: text-embedding
+model_properties:
+ context_size: 8192
+ max_chunks: 16
+pricing:
+ input: '0.008'
+ unit: '0.000001'
+ currency: 'USD'
diff --git a/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
new file mode 100644
index 0000000000..cdce69ff38
--- /dev/null
+++ b/api/core/model_runtime/model_providers/fireworks/text_embedding/text_embedding.py
@@ -0,0 +1,151 @@
+import time
+from collections.abc import Mapping
+from typing import Optional, Union
+
+import numpy as np
+from openai import OpenAI
+
+from core.embedding.embedding_constant import EmbeddingInputType
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.fireworks._common import _CommonFireworks
+
+
+class FireworksTextEmbeddingModel(_CommonFireworks, TextEmbeddingModel):
+ """
+ Model class for Fireworks text embedding model.
+ """
+
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
+
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ extra_model_kwargs = {}
+ if user:
+ extra_model_kwargs["user"] = user
+
+ extra_model_kwargs["encoding_format"] = "float"
+
+ context_size = self._get_context_size(model, credentials)
+ max_chunks = self._get_max_chunks(model, credentials)
+
+ inputs = []
+ indices = []
+ used_tokens = 0
+
+ for i, text in enumerate(texts):
+ # Here token count is only an approximation based on the GPT2 tokenizer
+ # TODO: Optimize for better token estimation and chunking
+ num_tokens = self._get_num_tokens_by_gpt2(text)
+
+ if num_tokens >= context_size:
+ cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
+ # if num tokens is larger than context length, only use the start
+ inputs.append(text[0:cutoff])
+ else:
+ inputs.append(text)
+ indices += [i]
+
+ batched_embeddings = []
+ _iter = range(0, len(inputs), max_chunks)
+
+ for i in _iter:
+ embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+ model=model,
+ client=client,
+ texts=inputs[i : i + max_chunks],
+ extra_model_kwargs=extra_model_kwargs,
+ )
+ used_tokens += embedding_used_tokens
+ batched_embeddings += embeddings_batch
+
+ usage = self._calc_response_usage(model=model, credentials=credentials, tokens=used_tokens)
+ return TextEmbeddingResult(embeddings=batched_embeddings, usage=usage, model=model)
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ return sum(self._get_num_tokens_by_gpt2(text) for text in texts)
+
+ def validate_credentials(self, model: str, credentials: Mapping) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ # transform credentials to kwargs for model instance
+ credentials_kwargs = self._to_credential_kwargs(credentials)
+ client = OpenAI(**credentials_kwargs)
+
+ # call embedding model
+ self._embedding_invoke(model=model, client=client, texts=["ping"], extra_model_kwargs={})
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ def _embedding_invoke(
+ self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict
+ ) -> tuple[list[list[float]], int]:
+ """
+ Invoke embedding model
+ :param model: model name
+ :param client: model client
+ :param texts: texts to embed
+ :param extra_model_kwargs: extra model kwargs
+ :return: embeddings and used tokens
+ """
+ response = client.embeddings.create(model=model, input=texts, **extra_model_kwargs)
+ return [data.embedding for data in response.data], response.usage.total_tokens
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ input_price_info = self.get_price(
+ model=model, credentials=credentials, tokens=tokens, price_type=PriceType.INPUT
+ )
+
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at,
+ )
+
+ return usage
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
new file mode 100644
index 0000000000..d84e9937e0
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-001
+label:
+ en_US: Gemini 1.5 Flash 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..2ff70564b2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
index bbc697e934..4e0209890a 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
new file mode 100644
index 0000000000..2aea8149f4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash-8b-exp-0924
+label:
+ en_US: Gemini 1.5 Flash 8B 0924
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
index c5695e5dda..faabc5e4d1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
index 24b1c5af8a..a22fcca941 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-flash-latest
label:
- en_US: Gemini 1.5 Flash
+ en_US: Gemini 1.5 Flash Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
new file mode 100644
index 0000000000..dfd55c3a94
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-flash
+label:
+ en_US: Gemini 1.5 Flash
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
new file mode 100644
index 0000000000..a1feff171d
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-001
+label:
+ en_US: Gemini 1.5 Pro 001
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..9ae07a06c5
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
index 0a918e0d7b..97c68f7a18 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
index 7452ce46e7..860e4816a1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index b3e1ecf3af..d1bf7d269d 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-pro-latest
label:
- en_US: Gemini 1.5 Pro
+ en_US: Gemini 1.5 Pro Latest
model_type: llm
features:
- agent-thought
@@ -32,6 +32,15 @@ parameter_rules:
max: 8192
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
new file mode 100644
index 0000000000..bdd70b34a2
--- /dev/null
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@@ -0,0 +1,48 @@
+model: gemini-1.5-pro
+label:
+ en_US: Gemini 1.5 Pro
+model_type: llm
+features:
+ - agent-thought
+ - vision
+ - tool-call
+ - stream-tool-call
+model_properties:
+ mode: chat
+ context_size: 2097152
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ zh_Hans: 取样数量
+ en_US: Top k
+ type: int
+ help:
+ zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: max_tokens_to_sample
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+ - name: response_format
+ use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
index 075e484e46..2d213d56ad 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -27,6 +27,15 @@ parameter_rules:
default: 4096
min: 1
max: 4096
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
index 4e9f59e7da..e2f487c1ee 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -31,6 +31,15 @@ parameter_rules:
max: 2048
- name: response_format
use_template: response_format
+ - name: stream
+ label:
+ zh_Hans: 流式输出
+ en_US: Stream
+ type: boolean
+ help:
+ zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果,而不是一次性生成全部结果后再返回。
+ en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
+ default: false
pricing:
input: '0.00'
output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/llm.py b/api/core/model_runtime/model_providers/google/llm/llm.py
index 3fc6787a44..e686ad08d9 100644
--- a/api/core/model_runtime/model_providers/google/llm/llm.py
+++ b/api/core/model_runtime/model_providers/google/llm/llm.py
@@ -9,8 +9,8 @@ import google.ai.generativelanguage as glm
import google.generativeai as genai
import requests
from google.api_core import exceptions
-from google.generativeai import client
-from google.generativeai.types import ContentType, GenerateContentResponse, HarmBlockThreshold, HarmCategory
+from google.generativeai.client import _ClientManager
+from google.generativeai.types import ContentType, GenerateContentResponse
from google.generativeai.types.content_types import to_part
from PIL import Image
@@ -200,24 +200,16 @@ class GoogleLargeLanguageModel(LargeLanguageModel):
history.append(content)
# Create a new ClientManager with tenant's API key
- new_client_manager = client._ClientManager()
+ new_client_manager = _ClientManager()
new_client_manager.configure(api_key=credentials["google_api_key"])
new_custom_client = new_client_manager.make_client("generative")
google_model._client = new_custom_client
- safety_settings = {
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
- }
-
response = google_model.generate_content(
contents=history,
generation_config=genai.types.GenerationConfig(**config_kwargs),
stream=stream,
- safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
request_options={"timeout": 600},
)
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
new file mode 100644
index 0000000000..019d453723
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-11b-text-preview
+label:
+ zh_Hans: Llama 3.2 11B Text (Preview)
+ en_US: Llama 3.2 11B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
new file mode 100644
index 0000000000..a44e4ff508
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-1b-preview
+label:
+ zh_Hans: Llama 3.2 1B Text (Preview)
+ en_US: Llama 3.2 1B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
new file mode 100644
index 0000000000..f2fdd0a05e
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-3b-preview
+label:
+ zh_Hans: Llama 3.2 3B Text (Preview)
+ en_US: Llama 3.2 3B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
new file mode 100644
index 0000000000..3b34e7c079
--- /dev/null
+++ b/api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
@@ -0,0 +1,25 @@
+model: llama-3.2-90b-text-preview
+label:
+ zh_Hans: Llama 3.2 90B Text (Preview)
+ en_US: Llama 3.2 90B Text (Preview)
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 131072
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 8192
+pricing:
+ input: '0.05'
+ output: '0.1'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
index 4ad96c4233..b2e6d1b652 100644
--- a/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_hub/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
import requests
from huggingface_hub import HfApi, InferenceClient
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -18,8 +19,23 @@ HUGGINGFACE_ENDPOINT_API = "https://api.endpoints.huggingface.cloud/v2/endpoint/
class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
client = InferenceClient(token=credentials["huggingfacehub_api_token"])
execute_model = model
diff --git a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
index 55f3c25804..b8ff3ca549 100644
--- a/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/huggingface_tei/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
import time
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -23,7 +24,12 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -38,6 +44,7 @@ class HuggingfaceTeiTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
index 1396e59e18..75701ebc54 100644
--- a/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/hunyuan/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -26,7 +27,12 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +41,7 @@ class HunyuanTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/jina/jina.yaml b/api/core/model_runtime/model_providers/jina/jina.yaml
index 4ff6ba0f22..970b22965b 100644
--- a/api/core/model_runtime/model_providers/jina/jina.yaml
+++ b/api/core/model_runtime/model_providers/jina/jina.yaml
@@ -67,46 +67,3 @@ model_credential_schema:
required: false
type: text-input
default: '8192'
- - variable: task
- label:
- zh_Hans: 下游任务
- en_US: Downstream task
- placeholder:
- zh_Hans: 选择将使用向量模型的下游任务。模型将返回针对该任务优化的向量。
- en_US: Select the downstream task for which the embeddings will be used. The model will return the optimized embeddings for that task.
- required: false
- type: select
- options:
- - value: retrieval.query
- label:
- en_US: retrieval.query
- - value: retrieval.passage
- label:
- en_US: retrieval.passage
- - value: separation
- label:
- en_US: separation
- - value: classification
- label:
- en_US: classification
- - value: text-matching
- label:
- en_US: text-matching
- - variable: dimensions
- label:
- zh_Hans: 输出维度
- en_US: Output dimensions
- placeholder:
- zh_Hans: 输入您的输出维度
- en_US: Enter output dimensions
- required: false
- type: text-input
- - variable: late_chunking
- label:
- zh_Hans: 后期分块
- en_US: Late chunking
- placeholder:
- zh_Hans: 应用后期分块技术来利用模型的长上下文功能来生成上下文块向量化。
- en_US: Apply the late chunking technique to leverage the model's long-context capabilities for generating contextual chunk embeddings.
- required: false
- type: switch
diff --git a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
index 6c96699ea2..b397129512 100644
--- a/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/jina/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -27,7 +28,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.jina.ai/v1"
- def _to_payload(self, model: str, texts: list[str], credentials: dict) -> dict:
+ def _to_payload(self, model: str, texts: list[str], credentials: dict, input_type: EmbeddingInputType) -> dict:
"""
Parse model credentials
@@ -44,23 +45,20 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
data = {"model": model, "input": [transform_jina_input_text(model, text) for text in texts]}
- task = credentials.get("task")
- dimensions = credentials.get("dimensions")
- late_chunking = credentials.get("late_chunking")
-
- if task is not None:
- data["task"] = task
-
- if dimensions is not None:
- data["dimensions"] = int(dimensions)
-
- if late_chunking is not None:
- data["late_chunking"] = late_chunking
+ # model specific parameters
+ if model == "jina-embeddings-v3":
+ # set `task` type according to input type for the best performance
+ data["task"] = "retrieval.query" if input_type == EmbeddingInputType.QUERY else "retrieval.passage"
return data
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -69,6 +67,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
@@ -81,7 +80,7 @@ class JinaTextEmbeddingModel(TextEmbeddingModel):
url = base_url + "/embeddings"
headers = {"Authorization": "Bearer " + api_key, "Content-Type": "application/json"}
- data = self._to_payload(model=model, texts=texts, credentials=credentials)
+ data = self._to_payload(model=model, texts=texts, credentials=credentials, input_type=input_type)
try:
response = post(url, headers=headers, data=dumps(data))
diff --git a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
index 7d258be81e..ab8ca76c2f 100644
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
from requests import post
from yarl import URL
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -22,11 +23,16 @@ from core.model_runtime.model_providers.__base.text_embedding_model import TextE
class LocalAITextEmbeddingModel(TextEmbeddingModel):
"""
- Model class for Jina text embedding model.
+ Model class for LocalAI text embedding model.
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +41,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if len(texts) != 1:
diff --git a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
index 76fd1342bd..74d2a221d1 100644
--- a/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/minimax/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -34,7 +35,12 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.minimax.chat/v1/embeddings"
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -43,6 +49,7 @@ class MinimaxTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["minimax_api_key"]
diff --git a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
index 05d9a9a0c6..68b7b448bf 100644
--- a/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/mixedbread/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -27,7 +28,12 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
api_base: str = "https://api.mixedbread.ai/v1"
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -36,6 +42,7 @@ class MixedBreadTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
index ccbfd196a9..857dfb5f41 100644
--- a/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nomic/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
from nomic import embed
from nomic import login as nomic_login
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage,
@@ -46,6 +47,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
credentials: dict,
texts: list[str],
user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -54,6 +56,7 @@ class NomicTextEmbeddingModel(_CommonNomic, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
embeddings, prompt_tokens, total_tokens = self.embed_text(
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
index 00cec265d5..936ceb8dd2 100644
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from requests import post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -27,7 +28,12 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
models: list[str] = ["NV-Embed-QA"]
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -36,6 +42,7 @@ class NvidiaTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
api_key = credentials["api_key"]
diff --git a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
index 80ad2be9f5..4de9296cca 100644
--- a/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/oci/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Optional
import numpy as np
import oci
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -41,7 +42,12 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -50,6 +56,7 @@ class OCITextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py
index ff732e6925..a7ea53e0e9 100644
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
if chunk_json["done"]:
# calculate num tokens
- if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
- # transform usage
+ if "prompt_eval_count" in chunk_json:
prompt_tokens = chunk_json["prompt_eval_count"]
- completion_tokens = chunk_json["eval_count"]
else:
- # calculate num tokens
- prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
- completion_tokens = self._get_num_tokens_by_gpt2(full_text)
+ prompt_message_content = prompt_messages[0].content
+ if isinstance(prompt_message_content, str):
+ prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
+ else:
+ content_text = ""
+ for message_content in prompt_message_content:
+ if message_content.type == PromptMessageContentType.TEXT:
+ message_content = cast(TextPromptMessageContent, message_content)
+ content_text += message_content.data
+ prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
+
+ completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
index b4c61d8a6d..5cf3f1c6fa 100644
--- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py
@@ -8,6 +8,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -38,7 +39,12 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -47,6 +53,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
index 535d8388bc..16f1a0cfa1 100644
--- a/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ import numpy as np
import tiktoken
from openai import OpenAI
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -19,7 +20,12 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -28,6 +34,7 @@ class OpenAITextEmbeddingModel(_CommonOpenAI, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# transform credentials to kwargs for model instance
diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
index e83cfdf873..64fa6aaa3c 100644
--- a/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
index 00e583cc79..c5d4330912 100644
--- a/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/openllm/text_embedding/text_embedding.py
@@ -5,6 +5,7 @@ from typing import Optional
from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (
@@ -25,7 +26,12 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -34,6 +40,7 @@ class OpenLLMTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
index b62a2d2aaf..1e86f351c8 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -28,7 +29,12 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -37,6 +43,7 @@ class OAICompatEmbeddingModel(_CommonOaiApiCompat, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
index 71b6fb99c4..9f724a77ac 100644
--- a/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/replicate/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -14,8 +15,23 @@ from core.model_runtime.model_providers.replicate._common import _CommonReplicat
class ReplicateEmbeddingModel(_CommonReplicate, TextEmbeddingModel):
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
client = ReplicateClient(api_token=credentials["replicate_api_token"], timeout=30)
if "model_version" in credentials:
diff --git a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
index 04789197ee..97b7692044 100644
--- a/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
+++ b/api/core/model_runtime/model_providers/sagemaker/llm/llm.py
@@ -84,8 +84,9 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
Model class for Cohere large language model.
"""
- sagemaker_client: Any = None
+ sagemaker_session: Any = None
predictor: Any = None
+ sagemaker_endpoint: str = None
def _handle_chat_generate_response(
self,
@@ -211,7 +212,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
:param user: unique user id
:return: full response or stream response chunk generator result
"""
- if not self.sagemaker_client:
+ if not self.sagemaker_session:
access_key = credentials.get("aws_access_key_id")
secret_key = credentials.get("aws_secret_access_key")
aws_region = credentials.get("aws_region")
@@ -226,11 +227,14 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
else:
boto_session = boto3.Session()
- self.sagemaker_client = boto_session.client("sagemaker")
- sagemaker_session = Session(boto_session=boto_session, sagemaker_client=self.sagemaker_client)
+ sagemaker_client = boto_session.client("sagemaker")
+ self.sagemaker_session = Session(boto_session=boto_session, sagemaker_client=sagemaker_client)
+
+ if self.sagemaker_endpoint != credentials.get("sagemaker_endpoint"):
+ self.sagemaker_endpoint = credentials.get("sagemaker_endpoint")
self.predictor = Predictor(
- endpoint_name=credentials.get("sagemaker_endpoint"),
- sagemaker_session=sagemaker_session,
+ endpoint_name=self.sagemaker_endpoint,
+ sagemaker_session=self.sagemaker_session,
serializer=serializers.JSONSerializer(),
)
diff --git a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
index d55144f8a7..8f993ce672 100644
--- a/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/sagemaker/text_embedding/text_embedding.py
@@ -6,6 +6,7 @@ from typing import Any, Optional
import boto3
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -53,7 +54,12 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
return embeddings
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -62,6 +68,7 @@ class SageMakerEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
# get model properties
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
index 43db4aed11..a3e5d0981f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -1,25 +1,38 @@
-- Qwen/Qwen2.5-7B-Instruct
-- Qwen/Qwen2.5-14B-Instruct
-- Qwen/Qwen2.5-32B-Instruct
- Qwen/Qwen2.5-72B-Instruct
+- Qwen/Qwen2.5-Math-72B-Instruct
+- Qwen/Qwen2.5-32B-Instruct
+- Qwen/Qwen2.5-14B-Instruct
+- Qwen/Qwen2.5-7B-Instruct
+- Qwen/Qwen2.5-Coder-7B-Instruct
+- deepseek-ai/DeepSeek-V2.5
- Qwen/Qwen2-72B-Instruct
- Qwen/Qwen2-57B-A14B-Instruct
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
-- 01-ai/Yi-1.5-34B-Chat
-- 01-ai/Yi-1.5-9B-Chat-16K
-- 01-ai/Yi-1.5-6B-Chat
-- THUDM/glm-4-9b-chat
-- deepseek-ai/DeepSeek-V2.5
- deepseek-ai/DeepSeek-V2-Chat
- deepseek-ai/DeepSeek-Coder-V2-Instruct
+- THUDM/glm-4-9b-chat
+- THUDM/chatglm3-6b
+- 01-ai/Yi-1.5-34B-Chat-16K
+- 01-ai/Yi-1.5-9B-Chat-16K
+- 01-ai/Yi-1.5-6B-Chat
+- internlm/internlm2_5-20b-chat
- internlm/internlm2_5-7b-chat
-- google/gemma-2-27b-it
-- google/gemma-2-9b-it
-- meta-llama/Meta-Llama-3-70B-Instruct
-- meta-llama/Meta-Llama-3-8B-Instruct
- meta-llama/Meta-Llama-3.1-405B-Instruct
- meta-llama/Meta-Llama-3.1-70B-Instruct
- meta-llama/Meta-Llama-3.1-8B-Instruct
-- mistralai/Mixtral-8x7B-Instruct-v0.1
+- meta-llama/Meta-Llama-3-70B-Instruct
+- meta-llama/Meta-Llama-3-8B-Instruct
+- google/gemma-2-27b-it
+- google/gemma-2-9b-it
- mistralai/Mistral-7B-Instruct-v0.2
+- Pro/Qwen/Qwen2-7B-Instruct
+- Pro/Qwen/Qwen2-1.5B-Instruct
+- Pro/THUDM/glm-4-9b-chat
+- Pro/THUDM/chatglm3-6b
+- Pro/01-ai/Yi-1.5-9B-Chat-16K
+- Pro/01-ai/Yi-1.5-6B-Chat
+- Pro/internlm/internlm2_5-7b-chat
+- Pro/meta-llama/Meta-Llama-3.1-8B-Instruct
+- Pro/meta-llama/Meta-Llama-3-8B-Instruct
+- Pro/google/gemma-2-9b-it
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
index 27664eab6c..89fb153ba0 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-7b-instruct-v0.2.yaml
@@ -28,3 +28,4 @@ pricing:
output: '0'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
index fd7aada428..2785e7496f 100644
--- a/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/mistral-8x7b-instruct-v0.1.yaml
@@ -28,3 +28,4 @@ pricing:
output: '1.26'
unit: '0.000001'
currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
index 6cdf4933b4..c5dcc12610 100644
--- a/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/siliconflow/text_embedding/text_embedding.py
@@ -1,5 +1,6 @@
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel,
@@ -16,8 +17,23 @@ class SiliconflowTextEmbeddingModel(OAICompatEmbeddingModel):
super().validate_credentials(model, credentials)
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
+ :return: embeddings result
+ """
self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, texts, user)
diff --git a/api/core/model_runtime/model_providers/spark/llm/llm.py b/api/core/model_runtime/model_providers/spark/llm/llm.py
index 57193dc031..1181ba699a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/llm.py
+++ b/api/core/model_runtime/model_providers/spark/llm/llm.py
@@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel):
:param prompt_messages: prompt messages
:return: llm response chunk generator result
"""
+ completion = ""
for index, content in enumerate(client.subscribe()):
if isinstance(content, dict):
delta = content["data"]
else:
delta = content
-
+ completion += delta
assistant_prompt_message = AssistantPromptMessage(
content=delta or "",
)
-
+ temp_assistant_prompt_message = AssistantPromptMessage(
+ content=completion,
+ )
prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
- completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+ completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message])
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
index d0ff443827..34a57d1fc0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/farui-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: farui-plus
label:
en_US: farui-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
index f90c7f075f..3e3585b30a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -18,7 +18,7 @@ from dashscope.common.error import (
UnsupportedModel,
)
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
ImagePromptMessageContent,
@@ -35,6 +35,7 @@ from core.model_runtime.entities.model_entities import (
FetchFrom,
I18nObject,
ModelFeature,
+ ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
@@ -97,6 +98,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling
:return:
"""
+ # Check if the model was added via get_customizable_model_schema
+ if self.get_customizable_model_schema(model, credentials) is not None:
+ # For custom models, tokens are not calculated.
+ return 0
+
if model in {"qwen-turbo-chat", "qwen-plus-chat"}:
model = model.replace("-chat", "")
if model == "farui-plus":
@@ -537,55 +543,51 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param credentials: model credentials
:return: AIModelEntity or None
"""
- rules = [
- ParameterRule(
- name="temperature",
- type=ParameterType.FLOAT,
- use_template="temperature",
- label=I18nObject(zh_Hans="温度", en_US="Temperature"),
- ),
- ParameterRule(
- name="top_p",
- type=ParameterType.FLOAT,
- use_template="top_p",
- label=I18nObject(zh_Hans="Top P", en_US="Top P"),
- ),
- ParameterRule(
- name="top_k",
- type=ParameterType.INT,
- min=0,
- max=99,
- label=I18nObject(zh_Hans="top_k", en_US="top_k"),
- ),
- ParameterRule(
- name="max_tokens",
- type=ParameterType.INT,
- min=1,
- max=128000,
- default=1024,
- label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
- ),
- ParameterRule(
- name="seed",
- type=ParameterType.INT,
- default=1234,
- label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"),
- ),
- ParameterRule(
- name="repetition_penalty",
- type=ParameterType.FLOAT,
- default=1.1,
- label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"),
- ),
- ]
-
- entity = AIModelEntity(
+ return AIModelEntity(
model=model,
- label=I18nObject(en_US=model),
- fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ label=I18nObject(en_US=model, zh_Hans=model),
model_type=ModelType.LLM,
- model_properties={},
- parameter_rules=rules,
+ features=[ModelFeature.TOOL_CALL, ModelFeature.MULTI_TOOL_CALL, ModelFeature.STREAM_TOOL_CALL]
+ if credentials.get("function_calling_type") == "tool_call"
+ else [],
+ fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+ model_properties={
+ ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", 8000)),
+ ModelPropertyKey.MODE: LLMMode.CHAT.value,
+ },
+ parameter_rules=[
+ ParameterRule(
+ name="temperature",
+ use_template="temperature",
+ label=I18nObject(en_US="Temperature", zh_Hans="温度"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="max_tokens",
+ use_template="max_tokens",
+ default=512,
+ min=1,
+ max=int(credentials.get("max_tokens", 1024)),
+ label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
+ type=ParameterType.INT,
+ ),
+ ParameterRule(
+ name="top_p",
+ use_template="top_p",
+ label=I18nObject(en_US="Top P", zh_Hans="Top P"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="top_k",
+ use_template="top_k",
+ label=I18nObject(en_US="Top K", zh_Hans="Top K"),
+ type=ParameterType.FLOAT,
+ ),
+ ParameterRule(
+ name="frequency_penalty",
+ use_template="frequency_penalty",
+ label=I18nObject(en_US="Frequency Penalty", zh_Hans="重复惩罚"),
+ type=ParameterType.FLOAT,
+ ),
+ ],
)
-
- return entity
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
index d9792e71ee..64a3f33133 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-0919
label:
en_US: qwen-coder-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
index 0b03505c45..a4c93f7047 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo-latest
label:
en_US: qwen-coder-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
index 2a6c040853..ff68faed80 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-coder-turbo
label:
en_US: qwen-coder-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
index bad7f4f472..c3dbb3616f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml
@@ -1,4 +1,4 @@
-# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-long
label:
en_US: qwen-long
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
index c14aee1e1e..42fe1f6862 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0816
label:
en_US: qwen-math-plus-0816
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
index 9d74eeca3e..9b6567b8cd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-0919
label:
en_US: qwen-math-plus-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
index b8601a969a..b2a2393b36 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus-latest
label:
en_US: qwen-math-plus-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
index 4a948be597..63f4b7ff0a 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-plus
label:
en_US: qwen-math-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
index bffe324a96..4da90eec3e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-0919
label:
en_US: qwen-math-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
index 0747e96614..d29f8851dd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo-latest
label:
en_US: qwen-math-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
index dffb5557ff..2a8f7f725e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-math-turbo
label:
en_US: qwen-math-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
index 8ae159f1bf..ef1841b517 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0107
label:
en_US: qwen-max-0107
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
index 93fb37254e..a2ea5df130 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0403, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0403
label:
en_US: qwen-max-0403
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
index a5c9d49609..a467665f11 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0428, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0428
label:
en_US: qwen-max-0428
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
index e4a6dae637..78661eaea0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-0919
label:
en_US: qwen-max-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
index 6fae8a7d38..6f4674576b 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-1201
label:
en_US: qwen-max-1201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
index 8e20968859..8b5f005473 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-latest
label:
en_US: qwen-max-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
index 9bc50c73fc..098494ff95 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max-longcontext
label:
en_US: qwen-max-longcontext
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
index c6a64dc507..9d0d3f8db3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-max, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#cf6cc4aa2aokf)
model: qwen-max
label:
en_US: qwen-max
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
index 430599300b..0b1a6f81df 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0206
label:
en_US: qwen-plus-0206
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
index 906995d2b9..7706005bb5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0624
label:
en_US: qwen-plus-0624
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
index b33e725dd0..348276fc08 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0723, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0723
label:
en_US: qwen-plus-0723
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
index bb394fad81..29f125135e 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0806, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0806
label:
en_US: qwen-plus-0806
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
index 118e304a97..905fa1e102 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-0919
label:
en_US: qwen-plus-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
index 761312bc38..c7a3549727 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-chat
label:
en_US: qwen-plus-chat
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
index 430872fb31..608f52c296 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus-latest
label:
en_US: qwen-plus-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
index f3fce30209..9089e57255 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-plus, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#bb0ffee88bwnk)
model: qwen-plus
label:
en_US: qwen-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
index 2628d824fe..7ee0d44f2f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml
@@ -1,3 +1,6 @@
+# this model corresponds to qwen-turbo-0206, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
+
model: qwen-turbo-0206
label:
en_US: qwen-turbo-0206
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
index 8097459bf0..20a3f7eb64 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0624, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0624
label:
en_US: qwen-turbo-0624
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
index e43beeb195..ba73dec363 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-0919, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-0919
label:
en_US: qwen-turbo-0919
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
index c30cb7ca10..d785b7fe85 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-chat
label:
en_US: qwen-turbo-chat
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
index e443d6888b..fe38a4283c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo-latest, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo-latest
label:
en_US: qwen-turbo-latest
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
index 33f05967c2..215c9ec5fc 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -1,3 +1,5 @@
+# this model corresponds to qwen-turbo, for more details
+# please refer to (https://help.aliyun.com/zh/model-studio/getting-started/models#ff492e2c10lub)
model: qwen-turbo
label:
en_US: qwen-turbo
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
index 63b6074d0d..d80168ffc3 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0201
label:
en_US: qwen-vl-max-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
index fd20377002..50e10226a5 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max-0809
label:
en_US: qwen-vl-max-0809
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
index 31a9fb51bb..21b127f56c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-max
label:
en_US: qwen-vl-max
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
index 5f90cf48bc..03cb039d15 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0201
label:
en_US: qwen-vl-plus-0201
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
index 97820c0f3a..67b2b2ebdd 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus-0809
label:
en_US: qwen-vl-plus-0809
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
index 6af36cd6f3..f55764c6c0 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen-vl-plus
label:
en_US: qwen-vl-plus
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
index 158e2c7ee1..ea157f42de 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-1.5b-instruct
label:
en_US: qwen2-math-1.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
index e26a6923d1..37052a9233 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-72b-instruct
label:
en_US: qwen2-math-72b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
index 589119b26e..e182f1c27f 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2-math-7b-instruct
label:
en_US: qwen2-math-7b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
index dd608fbf76..9e75ccc1f2 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-0.5b-instruct
label:
en_US: qwen2.5-0.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
index 08237b3958..67c9d31243 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-1.5b-instruct
label:
en_US: qwen2.5-1.5b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
index 640b019703..2a38be921c 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-14b-instruct
label:
en_US: qwen2.5-14b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
index 3a90ca7532..e6e4fbf978 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-32b-instruct
label:
en_US: qwen2.5-32b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
index b79755eb9b..8f250379a7 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-3b-instruct
label:
en_US: qwen2.5-3b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
index e9dd51a341..bb3cdd6141 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-72b-instruct
label:
en_US: qwen2.5-72b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
index 04f26cf5fe..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-7b-instruct
label:
en_US: qwen2.5-7b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
index 04f26cf5fe..fdcd3d4275 100644
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models
model: qwen2.5-7b-instruct
label:
en_US: qwen2.5-7b-instruct
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
index f4303c53d3..52e35d8b50 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v1
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
index f6be3544ed..5bb6a8f424 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v2
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
index 171a379ee2..d8af0e2b63 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v3.yaml
@@ -1,3 +1,4 @@
+# for more details, please refer to https://help.aliyun.com/zh/model-studio/getting-started/models#3383780daf8hw
model: text-embedding-v3
model_type: text-embedding
model_properties:
diff --git a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
index 5783d2e383..736cd44df8 100644
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
@@ -4,6 +4,7 @@ from typing import Optional
import dashscope
import numpy as np
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage,
@@ -27,6 +28,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
credentials: dict,
texts: list[str],
user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -35,6 +37,7 @@ class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
index fabe6d90e6..1a09c20fd9 100644
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -37,14 +37,51 @@ model_credential_schema:
en_US: Model Name
zh_Hans: 模型名称
placeholder:
- en_US: Enter full model name
- zh_Hans: 输入模型全称
+ en_US: Enter your model name
+ zh_Hans: 输入模型名称
credential_form_schemas:
- variable: dashscope_api_key
- required: true
label:
en_US: API Key
type: secret-input
+ required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
+ - variable: context_size
+ label:
+ zh_Hans: 模型上下文长度
+ en_US: Model context size
+ required: true
+ type: text-input
+ default: '4096'
+ placeholder:
+ zh_Hans: 在此输入您的模型上下文长度
+ en_US: Enter your Model context size
+ - variable: max_tokens
+ label:
+ zh_Hans: 最大 token 上限
+ en_US: Upper bound for max tokens
+ default: '4096'
+ type: text-input
+ show_on:
+ - variable: __model_type
+ value: llm
+ - variable: function_calling_type
+ label:
+ en_US: Function calling
+ type: select
+ required: false
+ default: no_call
+ options:
+ - value: no_call
+ label:
+ en_US: Not Support
+ zh_Hans: 不支持
+ - value: function_call
+ label:
+ en_US: Support
+ zh_Hans: 支持
+ show_on:
+ - variable: __model_type
+ value: llm
diff --git a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
index edd4a36d98..b6509cd26c 100644
--- a/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ import numpy as np
from openai import OpenAI
from tokenizers import Tokenizer
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -22,7 +23,14 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
def _get_tokenizer(self) -> Tokenizer:
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
- def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+ def _invoke(
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: str | None = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
+ ) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -30,6 +38,7 @@ class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
index c308f0a322..f5386be06d 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-001.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-flash-001
label:
- en_US: Gemini 1.5 Flash
+ en_US: Gemini 1.5 Flash 001
model_type: llm
features:
- agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
new file mode 100644
index 0000000000..97bd44f06b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-flash-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-flash-002
+label:
+ en_US: Gemini 1.5 Flash 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
similarity index 96%
rename from api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
rename to api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
index 744863e773..5e08f2294e 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-001.yaml
@@ -1,6 +1,6 @@
model: gemini-1.5-pro-001
label:
- en_US: Gemini 1.5 Pro
+ en_US: Gemini 1.5 Pro 001
model_type: llm
features:
- agent-thought
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
new file mode 100644
index 0000000000..8f327ea2f3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-1.5-pro-002.yaml
@@ -0,0 +1,37 @@
+model: gemini-1.5-pro-002
+label:
+ en_US: Gemini 1.5 Pro 002
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
new file mode 100644
index 0000000000..0f5eb34c0c
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-flash-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-flash-experimental
+label:
+ en_US: Gemini Flash Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
new file mode 100644
index 0000000000..fa31cabb85
--- /dev/null
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/gemini-pro-experimental.yaml
@@ -0,0 +1,37 @@
+model: gemini-pro-experimental
+label:
+ en_US: Gemini Pro Experimental
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 1048576
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: top_k
+ label:
+ en_US: Top k
+ type: int
+ help:
+ en_US: Only sample from the top K options for each subsequent token.
+ required: false
+ - name: presence_penalty
+ use_template: presence_penalty
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ - name: max_output_tokens
+ use_template: max_tokens
+ required: true
+ default: 8192
+ min: 1
+ max: 8192
+pricing:
+ input: '0.00'
+ output: '0.00'
+ unit: '0.000001'
+ currency: USD
diff --git a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
index da69b7cdf3..1dd785d545 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/llm/llm.py
@@ -2,6 +2,7 @@ import base64
import io
import json
import logging
+import time
from collections.abc import Generator
from typing import Optional, Union, cast
@@ -20,7 +21,6 @@ from google.api_core import exceptions
from google.cloud import aiplatform
from google.oauth2 import service_account
from PIL import Image
-from vertexai.generative_models import HarmBlockThreshold, HarmCategory
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
@@ -34,6 +34,7 @@ from core.model_runtime.entities.message_entities import (
ToolPromptMessage,
UserPromptMessage,
)
+from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
@@ -503,20 +504,12 @@ class VertexAiLargeLanguageModel(LargeLanguageModel):
else:
history.append(content)
- safety_settings = {
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
- }
-
google_model = glm.GenerativeModel(model_name=model, system_instruction=system_instruction)
response = google_model.generate_content(
contents=history,
generation_config=glm.GenerationConfig(**config_kwargs),
stream=stream,
- safety_settings=safety_settings,
tools=self._convert_tools_to_glm_tool(tools) if tools else None,
)
diff --git a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
index 519373a7f3..fce9544df0 100644
--- a/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/vertex_ai/text_embedding/text_embedding.py
@@ -9,6 +9,7 @@ from google.cloud import aiplatform
from google.oauth2 import service_account
from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -30,7 +31,12 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -38,6 +44,8 @@ class VertexAiTextEmbeddingModel(_CommonVertexAi, TextEmbeddingModel):
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
+ :param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
service_account_info = json.loads(base64.b64decode(credentials["vertex_service_account_key"]))
diff --git a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
index 9cba2cb879..0dd4037c95 100644
--- a/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/text_embedding/text_embedding.py
@@ -2,6 +2,7 @@ import time
from decimal import Decimal
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,
@@ -41,7 +42,12 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -50,6 +56,7 @@ class VolcengineMaaSTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
if ArkClientV3.is_legacy(credentials):
diff --git a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
index 4d6f6dccd0..c21d0c0552 100644
--- a/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/wenxin/text_embedding/text_embedding.py
@@ -7,6 +7,7 @@ from typing import Any, Optional
import numpy as np
from requests import Response, post
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import InvokeError
@@ -70,7 +71,12 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
return WenxinTextEmbedding(api_key, secret_key)
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -79,6 +85,7 @@ class WenxinTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
diff --git a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
index 8043af1d6c..1627239132 100644
--- a/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/xinference/text_embedding/text_embedding.py
@@ -3,6 +3,7 @@ from typing import Optional
from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
@@ -25,7 +26,12 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -40,6 +46,7 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
server_url = credentials["server_url"]
diff --git a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
index ee20954381..14a529dddf 100644
--- a/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/zhipuai/text_embedding/text_embedding.py
@@ -1,6 +1,7 @@
import time
from typing import Optional
+from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
@@ -15,7 +16,12 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
"""
def _invoke(
- self, model: str, credentials: dict, texts: list[str], user: Optional[str] = None
+ self,
+ model: str,
+ credentials: dict,
+ texts: list[str],
+ user: Optional[str] = None,
+ input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
) -> TextEmbeddingResult:
"""
Invoke text embedding model
@@ -24,6 +30,7 @@ class ZhipuAITextEmbeddingModel(_CommonZhipuaiAI, TextEmbeddingModel):
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
+ :param input_type: input type
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
index 612542dab1..6dcd98dcfd 100644
--- a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
+++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py
@@ -40,19 +40,8 @@ class AnalyticdbConfig(BaseModel):
class AnalyticdbVector(BaseVector):
- _instance = None
- _init = False
-
- def __new__(cls, *args, **kwargs):
- if cls._instance is None:
- cls._instance = super().__new__(cls)
- return cls._instance
-
def __init__(self, collection_name: str, config: AnalyticdbConfig):
- # collection_name must be updated every time
self._collection_name = collection_name.lower()
- if AnalyticdbVector._init:
- return
try:
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models
@@ -62,7 +51,6 @@ class AnalyticdbVector(BaseVector):
self._client_config = open_api_models.Config(user_agent="dify", **config.to_analyticdb_client_params())
self._client = Client(self._client_config)
self._initialize()
- AnalyticdbVector._init = True
def _initialize(self) -> None:
cache_key = f"vector_indexing_{self.config.instance_id}"
@@ -257,11 +245,14 @@ class AnalyticdbVector(BaseVector):
documents = []
for match in response.body.matches.match:
if match.score > score_threshold:
+ metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
- metadata=json.loads(match.metadata.get("metadata_")),
+ metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
@@ -286,12 +277,14 @@ class AnalyticdbVector(BaseVector):
for match in response.body.matches.match:
if match.score > score_threshold:
metadata = json.loads(match.metadata.get("metadata_"))
+ metadata["score"] = match.score
doc = Document(
page_content=match.metadata.get("page_content"),
vector=match.metadata.get("vector"),
metadata=metadata,
)
documents.append(doc)
+ documents = sorted(documents, key=lambda x: x.metadata["score"], reverse=True)
return documents
def delete(self) -> None:
diff --git a/api/core/rag/datasource/vdb/vector_base.py b/api/core/rag/datasource/vdb/vector_base.py
index 1a0dc7f48b..22e191340d 100644
--- a/api/core/rag/datasource/vdb/vector_base.py
+++ b/api/core/rag/datasource/vdb/vector_base.py
@@ -45,6 +45,7 @@ class BaseVector(ABC):
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
raise NotImplementedError
+ @abstractmethod
def delete(self) -> None:
raise NotImplementedError
diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py
index fe7eaa32e6..0ffc89b214 100644
--- a/api/core/rag/extractor/extract_processor.py
+++ b/api/core/rag/extractor/extract_processor.py
@@ -124,7 +124,7 @@ class ExtractProcessor:
extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url)
elif file_extension == ".xml":
extractor = UnstructuredXmlExtractor(file_path, unstructured_api_url)
- elif file_extension == "epub":
+ elif file_extension == ".epub":
extractor = UnstructuredEpubExtractor(file_path, unstructured_api_url)
else:
# txt
@@ -146,7 +146,7 @@ class ExtractProcessor:
extractor = WordExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
elif file_extension == ".csv":
extractor = CSVExtractor(file_path, autodetect_encoding=True)
- elif file_extension == "epub":
+ elif file_extension == ".epub":
extractor = UnstructuredEpubExtractor(file_path)
else:
# txt
diff --git a/api/core/tools/provider/builtin/comfyui/comfyui.yaml b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
index 066fd85308..3891eebf3a 100644
--- a/api/core/tools/provider/builtin/comfyui/comfyui.yaml
+++ b/api/core/tools/provider/builtin/comfyui/comfyui.yaml
@@ -39,4 +39,4 @@ credentials_for_provider:
en_US: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
zh_Hans: ComfyUI服务器的模型名称, 比如 xxx.safetensors
pt_BR: The checkpoint name of the ComfyUI server, e.g. xxx.safetensors
- url: https://docs.dify.ai/tutorials/tool-configuration/comfyui
+ url: https://github.com/comfyanonymous/ComfyUI#installing
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
index 58ad6d8694..589bc3433d 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_reader.yaml
@@ -2,14 +2,14 @@ identity:
name: jina_reader
author: Dify
label:
- en_US: JinaReader
- zh_Hans: JinaReader
- pt_BR: JinaReader
+ en_US: Fetch Single Page
+ zh_Hans: 获取单页面
+ pt_BR: Fetch Single Page
description:
human:
- en_US: Convert any URL to an LLM-friendly input. Experience improved output for your agent and RAG systems at no cost.
- zh_Hans: 将任何 URL 转换为 LLM 友好的输入。无需付费即可体验为您的 Agent 和 RAG 系统提供的改进输出。
- pt_BR: Converta qualquer URL em uma entrada amigável ao LLM. Experimente uma saída aprimorada para seus sistemas de agente e RAG sem custo.
+ en_US: Fetch the target URL (can be a PDF) and convert it into a LLM-friendly markdown.
+ zh_Hans: 获取目标网址(可以是 PDF),并将其转换为适合大模型处理的 Markdown 格式。
+ pt_BR: Busque a URL de destino (que pode ser um PDF) e converta em um Markdown LLM-friendly.
llm: A tool for scraping webpages. Input should be a URL.
parameters:
- name: url
@@ -17,13 +17,13 @@ parameters:
required: true
label:
en_US: URL
- zh_Hans: 网页链接
+ zh_Hans: 网址
pt_BR: URL
human_description:
- en_US: used for linking to webpages
- zh_Hans: 用于链接到网页
- pt_BR: used for linking to webpages
- llm_description: url for scraping
+ en_US: Web link
+ zh_Hans: 网页链接
+ pt_BR: URL da web
+ llm_description: url para scraping
form: llm
- name: request_params
type: string
@@ -31,14 +31,14 @@ parameters:
label:
en_US: Request params
zh_Hans: 请求参数
- pt_BR: Request params
+ pt_BR: Parâmetros de solicitação
human_description:
en_US: |
request parameters, format: {"key1": "value1", "key2": "value2"}
zh_Hans: |
请求参数,格式:{"key1": "value1", "key2": "value2"}
pt_BR: |
- request parameters, format: {"key1": "value1", "key2": "value2"}
+ parâmetros de solicitação, formato: {"key1": "value1", "key2": "value2"}
llm_description: request parameters
form: llm
- name: target_selector
@@ -51,7 +51,7 @@ parameters:
human_description:
en_US: css selector for scraping specific elements
zh_Hans: css 选择器用于抓取特定元素
- pt_BR: css selector for scraping specific elements
+ pt_BR: css selector para scraping de elementos específicos
llm_description: css selector of the target element to scrape
form: form
- name: wait_for_selector
@@ -64,7 +64,7 @@ parameters:
human_description:
en_US: css selector for waiting for specific elements
zh_Hans: css 选择器用于等待特定元素
- pt_BR: css selector for waiting for specific elements
+ pt_BR: css selector para aguardar elementos específicos
llm_description: css selector of the target element to wait for
form: form
- name: image_caption
@@ -77,8 +77,8 @@ parameters:
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
- zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
- pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
+ zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。"
+ pt_BR: "Adiciona legendas a todas as imagens na URL especificada, adicionando 'Imagem [idx]: [legenda]' como uma tag alt para aquelas que não têm uma. Isso permite que os modelos LLM inferiores interajam com as imagens em atividades como raciocínio e resumo."
llm_description: Captions all images at the specified URL
form: form
- name: gather_all_links_at_the_end
@@ -91,8 +91,8 @@ parameters:
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
- zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
- pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+ zh_Hans: 末尾将添加“按钮和链接”部分,方便下游模型或网络代理做页面导航或执行进一步操作。
+ pt_BR: Um "Botões & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
@@ -105,8 +105,8 @@ parameters:
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
- zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
- pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+ zh_Hans: 末尾会新增“图片”部分,方便下游模型全面了解页面的视觉内容,提升推理效果。
+ pt_BR: Um "Imagens" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
llm_description: Gather all images at the end
form: form
- name: proxy_server
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
index 2bc70e1be1..e58c639e56 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_search.yaml
@@ -2,13 +2,14 @@ identity:
name: jina_search
author: Dify
label:
- en_US: JinaSearch
- zh_Hans: JinaSearch
- pt_BR: JinaSearch
+ en_US: Search the web
+ zh_Hans: 联网搜索
+ pt_BR: Search the web
description:
human:
- en_US: Search on the web and get the top 5 results. Useful for grounding using information from the web.
- zh_Hans: 在网络上搜索返回前 5 个结果。
+ en_US: Search on the public web of a given query and return the top results as LLM-friendly markdown.
+ zh_Hans: 针对给定的查询在互联网上进行搜索,并以适合大模型处理的 Markdown 格式返回最相关的结果。
+ pt_BR: Procurar na web pública de uma consulta fornecida e retornar os melhores resultados como markdown para LLMs.
llm: A tool for searching results on the web for grounding. Input should be a simple question.
parameters:
- name: query
@@ -16,11 +17,13 @@ parameters:
required: true
label:
en_US: Question (Query)
- zh_Hans: 信息查询
+ zh_Hans: 查询
+ pt_BR: Pergunta (Consulta)
human_description:
en_US: used to find information on the web
zh_Hans: 在网络上搜索信息
- llm_description: simple question to ask on the web
+ pt_BR: Usado para encontrar informações na web
+ llm_description: Pergunta simples para fazer na web
form: llm
- name: image_caption
type: boolean
@@ -32,7 +35,7 @@ parameters:
pt_BR: Legenda da imagem
human_description:
en_US: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
- zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签。这允许下游 LLM 在推理和总结等活动中与图像进行交互。"
+ zh_Hans: "为指定 URL 上的所有图像添加标题,为没有标题的图像添加“Image [idx]: [caption]”作为 alt 标签,以支持下游模型的图像交互。"
pt_BR: "Captions all images at the specified URL, adding 'Image [idx]: [caption]' as an alt tag for those without one. This allows downstream LLMs to interact with the images in activities such as reasoning and summarizing."
llm_description: Captions all images at the specified URL
form: form
@@ -46,8 +49,8 @@ parameters:
pt_BR: Coletar todos os links ao final
human_description:
en_US: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
- zh_Hans: 最后会创建一个“按钮和链接”部分。这可以帮助下游 LLM 或 Web 代理浏览页面或采取进一步的行动。
- pt_BR: A "Buttons & Links" section will be created at the end. This helps the downstream LLMs or web agents navigating the page or take further actions.
+ zh_Hans: 末尾将添加“按钮和链接”部分,汇总页面上的所有链接。方便下游模型或网络代理做页面导航或执行进一步操作。
+ pt_BR: Um "Botão & Links" seção será criada no final. Isso ajuda os LLMs ou agentes da web navegando pela página ou executar ações adicionais.
llm_description: Gather all links at the end
form: form
- name: gather_all_images_at_the_end
@@ -60,8 +63,8 @@ parameters:
pt_BR: Coletar todas as imagens ao final
human_description:
en_US: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
- zh_Hans: 最后会创建一个“图像”部分。这可以让下游的 LLM 概览页面上的所有视觉效果,从而提高推理能力。
- pt_BR: An "Images" section will be created at the end. This gives the downstream LLMs an overview of all visuals on the page, which may improve reasoning.
+ zh_Hans: 末尾会新增“图片”部分,汇总页面上的所有图片。方便下游模型概览页面的视觉内容,提升推理效果。
+ pt_BR: Um "Imagens" seção será criada no final. Isso fornece uma visão geral de todas as imagens na página para os LLMs, que pode melhorar a razão.
llm_description: Gather all images at the end
form: form
- name: proxy_server
@@ -74,7 +77,7 @@ parameters:
human_description:
en_US: Use proxy to access URLs
zh_Hans: 利用代理访问 URL
- pt_BR: Use proxy to access URLs
+ pt_BR: Usar proxy para acessar URLs
llm_description: Use proxy to access URLs
form: form
- name: no_cache
@@ -83,7 +86,7 @@ parameters:
default: false
label:
en_US: Bypass the Cache
- zh_Hans: 绕过缓存
+ zh_Hans: 是否绕过缓存
pt_BR: Ignorar o cache
human_description:
en_US: Bypass the Cache
diff --git a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
index 62a5c7e7ba..74885cdf9a 100644
--- a/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
+++ b/api/core/tools/provider/builtin/jina/tools/jina_tokenizer.yaml
@@ -2,11 +2,14 @@ identity:
name: jina_tokenizer
author: hjlarry
label:
- en_US: JinaTokenizer
+ en_US: Segment
+ zh_Hans: 切分器
+ pt_BR: Segment
description:
human:
- en_US: Free API to tokenize text and segment long text into chunks.
- zh_Hans: 免费的API可以将文本tokenize,也可以将长文本分割成多个部分。
+ en_US: Split long text into chunks and do tokenization.
+ zh_Hans: 将长文本拆分成小段落,并做分词处理。
+ pt_BR: Dividir o texto longo em pedaços e fazer tokenização.
llm: Free API to tokenize text and segment long text into chunks.
parameters:
- name: content
@@ -15,6 +18,7 @@ parameters:
label:
en_US: Content
zh_Hans: 内容
+ pt_BR: Conteúdo
llm_description: the content which need to tokenize or segment
form: llm
- name: return_tokens
@@ -23,18 +27,22 @@ parameters:
label:
en_US: Return the tokens
zh_Hans: 是否返回tokens
+ pt_BR: Retornar os tokens
human_description:
en_US: Return the tokens and their corresponding ids in the response.
zh_Hans: 返回tokens及其对应的ids。
+ pt_BR: Retornar os tokens e seus respectivos ids na resposta.
form: form
- name: return_chunks
type: boolean
label:
en_US: Return the chunks
zh_Hans: 是否分块
+ pt_BR: Retornar os chunks
human_description:
en_US: Chunking the input into semantically meaningful segments while handling a wide variety of text types and edge cases based on common structural cues.
- zh_Hans: 将输入分块为具有语义意义的片段,同时根据常见的结构线索处理各种文本类型和边缘情况。
+ zh_Hans: 将输入文本分块为语义有意义的片段,同时基于常见的结构线索处理各种文本类型和特殊情况。
+ pt_BR: Dividir o texto de entrada em segmentos semanticamente significativos, enquanto lida com uma ampla variedade de tipos de texto e casos de borda com base em pistas estruturais comuns.
form: form
- name: tokenizer
type: select
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.py b/api/core/tools/provider/builtin/stepfun/stepfun.py
index b24f730c95..239db85b11 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.py
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.py
@@ -16,7 +16,7 @@ class StepfunProvider(BuiltinToolProviderController):
user_id="",
tool_parameters={
"prompt": "cute girl, blue eyes, white hair, anime style",
- "size": "1024x1024",
+ "size": "256x256",
"n": 1,
},
)
diff --git a/api/core/tools/provider/builtin/stepfun/stepfun.yaml b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
index 1f841ec369..e8139a4d7d 100644
--- a/api/core/tools/provider/builtin/stepfun/stepfun.yaml
+++ b/api/core/tools/provider/builtin/stepfun/stepfun.yaml
@@ -4,11 +4,9 @@ identity:
label:
en_US: Image-1X
zh_Hans: 阶跃星辰绘画
- pt_BR: Image-1X
description:
en_US: Image-1X
zh_Hans: 阶跃星辰绘画
- pt_BR: Image-1X
icon: icon.png
tags:
- image
@@ -20,27 +18,16 @@ credentials_for_provider:
label:
en_US: Stepfun API key
zh_Hans: 阶跃星辰API key
- pt_BR: Stepfun API key
- help:
- en_US: Please input your stepfun API key
- zh_Hans: 请输入你的阶跃星辰 API key
- pt_BR: Please input your stepfun API key
placeholder:
- en_US: Please input your stepfun API key
+ en_US: Please input your Stepfun API key
zh_Hans: 请输入你的阶跃星辰 API key
- pt_BR: Please input your stepfun API key
+ url: https://platform.stepfun.com/interface-key
stepfun_base_url:
type: text-input
required: false
label:
en_US: Stepfun base URL
zh_Hans: 阶跃星辰 base URL
- pt_BR: Stepfun base URL
- help:
- en_US: Please input your Stepfun base URL
- zh_Hans: 请输入你的阶跃星辰 base URL
- pt_BR: Please input your Stepfun base URL
placeholder:
en_US: Please input your Stepfun base URL
zh_Hans: 请输入你的阶跃星辰 base URL
- pt_BR: Please input your Stepfun base URL
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.py b/api/core/tools/provider/builtin/stepfun/tools/image.py
index 0b92b122bf..eb55dae518 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.py
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.py
@@ -1,4 +1,3 @@
-import random
from typing import Any, Union
from openai import OpenAI
@@ -19,7 +18,7 @@ class StepfunTool(BuiltinTool):
"""
invoke tools
"""
- base_url = self.runtime.credentials.get("stepfun_base_url", "https://api.stepfun.com")
+ base_url = self.runtime.credentials.get("stepfun_base_url") or "https://api.stepfun.com"
base_url = str(URL(base_url) / "v1")
client = OpenAI(
@@ -28,9 +27,7 @@ class StepfunTool(BuiltinTool):
)
extra_body = {}
- model = tool_parameters.get("model", "step-1x-medium")
- if not model:
- return self.create_text_message("Please input model name")
+ model = "step-1x-medium"
# prompt
prompt = tool_parameters.get("prompt", "")
if not prompt:
@@ -67,9 +64,3 @@ class StepfunTool(BuiltinTool):
)
)
return result
-
- @staticmethod
- def _generate_random_id(length=8):
- characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
- random_id = "".join(random.choices(characters, k=length))
- return random_id
diff --git a/api/core/tools/provider/builtin/stepfun/tools/image.yaml b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
index dcc5bd2db2..8d7c9b6586 100644
--- a/api/core/tools/provider/builtin/stepfun/tools/image.yaml
+++ b/api/core/tools/provider/builtin/stepfun/tools/image.yaml
@@ -29,35 +29,6 @@ parameters:
pt_BR: Image prompt, you can check the official documentation of step-1x
llm_description: Image prompt of step-1x you should describe the image you want to generate as a list of words as possible as detailed
form: llm
- - name: model
- type: select
- required: false
- human_description:
- en_US: used for selecting the model name
- zh_Hans: 用于选择模型的名字
- pt_BR: used for selecting the model name
- label:
- en_US: Model Name
- zh_Hans: 模型名字
- pt_BR: Model Name
- form: form
- options:
- - value: step-1x-turbo
- label:
- en_US: turbo
- zh_Hans: turbo
- pt_BR: turbo
- - value: step-1x-medium
- label:
- en_US: medium
- zh_Hans: medium
- pt_BR: medium
- - value: step-1x-large
- label:
- en_US: large
- zh_Hans: large
- pt_BR: large
- default: step-1x-medium
- name: size
type: select
required: false
diff --git a/api/core/tools/provider/builtin/tavily/tavily.yaml b/api/core/tools/provider/builtin/tavily/tavily.yaml
index 7b25a81848..95820f4d18 100644
--- a/api/core/tools/provider/builtin/tavily/tavily.yaml
+++ b/api/core/tools/provider/builtin/tavily/tavily.yaml
@@ -28,4 +28,4 @@ credentials_for_provider:
en_US: Get your Tavily API key from Tavily
zh_Hans: 从 TavilyApi 获取您的 Tavily API key
pt_BR: Get your Tavily API key from Tavily
- url: https://docs.tavily.com/docs/tavily-api/introduction
+ url: https://docs.tavily.com/docs/welcome
diff --git a/api/core/tools/provider/builtin/youtube/youtube.py b/api/core/tools/provider/builtin/youtube/youtube.py
index aad876491c..07e430bcbf 100644
--- a/api/core/tools/provider/builtin/youtube/youtube.py
+++ b/api/core/tools/provider/builtin/youtube/youtube.py
@@ -13,7 +13,7 @@ class YahooFinanceProvider(BuiltinToolProviderController):
).invoke(
user_id="",
tool_parameters={
- "channel": "TOKYO GIRLS COLLECTION",
+ "channel": "UC2JZCsZSOudXA08cMMRCL9g",
"start_date": "2020-01-01",
"end_date": "2024-12-31",
},
diff --git a/api/core/tools/provider/tool_provider.py b/api/core/tools/provider/tool_provider.py
index 05c88b904e..321b212014 100644
--- a/api/core/tools/provider/tool_provider.py
+++ b/api/core/tools/provider/tool_provider.py
@@ -153,6 +153,9 @@ class ToolProviderController(BaseModel, ABC):
# check type
credential_schema = credentials_need_to_validate[credential_name]
+ if not credential_schema.required and credentials[credential_name] is None:
+ continue
+
if credential_schema.type in {
ToolProviderCredentials.CredentialsType.SECRET_INPUT,
ToolProviderCredentials.CredentialsType.TEXT_INPUT,
diff --git a/api/poetry.lock b/api/poetry.lock
index 184cdb9e81..bce21fb547 100644
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -2333,13 +2333,13 @@ develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk",
[[package]]
name = "elasticsearch"
-version = "8.14.0"
+version = "8.15.1"
description = "Python client for Elasticsearch"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
files = [
- {file = "elasticsearch-8.14.0-py3-none-any.whl", hash = "sha256:cef8ef70a81af027f3da74a4f7d9296b390c636903088439087b8262a468c130"},
- {file = "elasticsearch-8.14.0.tar.gz", hash = "sha256:aa2490029dd96f4015b333c1827aa21fd6c0a4d223b00dfb0fe933b8d09a511b"},
+ {file = "elasticsearch-8.15.1-py3-none-any.whl", hash = "sha256:02a0476e98768a30d7926335fc0d305c04fdb928eea1354c6e6040d8c2814569"},
+ {file = "elasticsearch-8.15.1.tar.gz", hash = "sha256:40c0d312f8adf8bdc81795bc16a0b546ddf544cb1f90e829a244e4780c4dbfd8"},
]
[package.dependencies]
@@ -2347,7 +2347,10 @@ elastic-transport = ">=8.13,<9"
[package.extras]
async = ["aiohttp (>=3,<4)"]
+dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "nox", "numpy", "orjson", "pandas", "pyarrow", "pytest", "pytest-asyncio", "pytest-cov", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "twine", "unasync"]
+docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"]
orjson = ["orjson (>=3)"]
+pyarrow = ["pyarrow (>=1)"]
requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"]
vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"]
@@ -10498,4 +10501,4 @@ cffi = ["cffi (>=1.11)"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
-content-hash = "17c4108d92c415d987f8b437ea3e0484c5601a05bfe175339a8546c93c159bc5"
+content-hash = "69b42bb1ff033f14e199fee8335356275099421d72bbd7037b7a991ea65cae08"
diff --git a/api/pyproject.toml b/api/pyproject.toml
index 9e38c09456..f004865d5f 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -253,7 +253,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
alibabacloud_tea_openapi = "~0.3.9"
chromadb = "0.5.1"
clickhouse-connect = "~0.7.16"
-elasticsearch = "8.14.0"
+elasticsearch = "~8.15.1"
oracledb = "~2.2.1"
pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] }
pgvector = "0.2.5"
diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
new file mode 100644
index 0000000000..7bf723b3a9
--- /dev/null
+++ b/api/tests/integration_tests/model_runtime/fireworks/test_text_embedding.py
@@ -0,0 +1,54 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.fireworks.text_embedding.text_embedding import FireworksTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+ model = FireworksTextEmbeddingModel()
+
+ with pytest.raises(CredentialsValidateFailedError):
+ model.validate_credentials(
+ model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": "invalid_key"}
+ )
+
+ model.validate_credentials(
+ model="nomic-ai/nomic-embed-text-v1.5", credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}
+ )
+
+
+@pytest.mark.parametrize("setup_openai_mock", [["text_embedding"]], indirect=True)
+def test_invoke_model(setup_openai_mock):
+ model = FireworksTextEmbeddingModel()
+
+ result = model.invoke(
+ model="nomic-ai/nomic-embed-text-v1.5",
+ credentials={
+ "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+ },
+ texts=["hello", "world", " ".join(["long_text"] * 100), " ".join(["another_long_text"] * 100)],
+ user="foo",
+ )
+
+ assert isinstance(result, TextEmbeddingResult)
+ assert len(result.embeddings) == 4
+ assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+ model = FireworksTextEmbeddingModel()
+
+ num_tokens = model.get_num_tokens(
+ model="nomic-ai/nomic-embed-text-v1.5",
+ credentials={
+ "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY"),
+ },
+ texts=["hello", "world"],
+ )
+
+ assert num_tokens == 2
diff --git a/docker/.env.example b/docker/.env.example
index 7eaaceb928..d43c3edc7e 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -346,7 +346,7 @@ VOLCENGINE_TOS_REGION=your-region
# ------------------------------
# The type of vector store to use.
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`.
+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, ``chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`.
VECTOR_STORE=weaviate
# The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
@@ -385,13 +385,30 @@ MYSCALE_PASSWORD=
MYSCALE_DATABASE=dify
MYSCALE_FTS_PARAMS=
-# pgvector configurations, only available when VECTOR_STORE is `pgvecto-rs or pgvector`
+# pgvector configurations, only available when VECTOR_STORE is `pgvector`
PGVECTOR_HOST=pgvector
PGVECTOR_PORT=5432
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=difyai123456
PGVECTOR_DATABASE=dify
+# pgvecto-rs configurations, only available when VECTOR_STORE is `pgvecto-rs`
+PGVECTO_RS_HOST=pgvecto-rs
+PGVECTO_RS_PORT=5432
+PGVECTO_RS_USER=postgres
+PGVECTO_RS_PASSWORD=difyai123456
+PGVECTO_RS_DATABASE=dify
+
+# analyticdb configurations, only available when VECTOR_STORE is `analyticdb`
+ANALYTICDB_KEY_ID=your-ak
+ANALYTICDB_KEY_SECRET=your-sk
+ANALYTICDB_REGION_ID=cn-hangzhou
+ANALYTICDB_INSTANCE_ID=gp-ab123456
+ANALYTICDB_ACCOUNT=testaccount
+ANALYTICDB_PASSWORD=testpassword
+ANALYTICDB_NAMESPACE=dify
+ANALYTICDB_NAMESPACE_PASSWORD=difypassword
+
# TiDB vector configurations, only available when VECTOR_STORE is `tidb`
TIDB_VECTOR_HOST=tidb
TIDB_VECTOR_PORT=4000
@@ -568,6 +585,10 @@ WORKFLOW_MAX_EXECUTION_STEPS=500
WORKFLOW_MAX_EXECUTION_TIME=1200
WORKFLOW_CALL_MAX_DEPTH=5
+# HTTP request node in workflow configuration
+HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
+HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
+
# SSRF Proxy server HTTP URL
SSRF_PROXY_HTTP_URL=http://ssrf_proxy:3128
# SSRF Proxy server HTTPS URL
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 16bef279bc..95e271a0e9 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -207,6 +207,8 @@ x-shared-env: &shared-api-worker-env
WORKFLOW_CALL_MAX_DEPTH: ${WORKFLOW_MAX_EXECUTION_TIME:-5}
SSRF_PROXY_HTTP_URL: ${SSRF_PROXY_HTTP_URL:-http://ssrf_proxy:3128}
SSRF_PROXY_HTTPS_URL: ${SSRF_PROXY_HTTPS_URL:-http://ssrf_proxy:3128}
+ HTTP_REQUEST_NODE_MAX_BINARY_SIZE: ${HTTP_REQUEST_NODE_MAX_BINARY_SIZE:-10485760}
+ HTTP_REQUEST_NODE_MAX_TEXT_SIZE: ${HTTP_REQUEST_NODE_MAX_TEXT_SIZE:-1048576}
services:
# API service
@@ -628,7 +630,7 @@ services:
# https://www.elastic.co/guide/en/elasticsearch/reference/current/settings.html
# https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#docker-prod-prerequisites
elasticsearch:
- image: docker.elastic.co/elasticsearch/elasticsearch:8.14.3
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.15.1
container_name: elasticsearch
profiles:
- elasticsearch
@@ -655,7 +657,7 @@ services:
# https://www.elastic.co/guide/en/kibana/current/docker.html
# https://www.elastic.co/guide/en/kibana/current/settings.html
kibana:
- image: docker.elastic.co/kibana/kibana:8.14.3
+ image: docker.elastic.co/kibana/kibana:8.15.1
container_name: kibana
profiles:
- elasticsearch
diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py
index 2be079bdf3..5e42507a42 100644
--- a/sdks/python-client/dify_client/client.py
+++ b/sdks/python-client/dify_client/client.py
@@ -1,103 +1,80 @@
import json
+
import requests
class DifyClient:
- def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1'):
+ def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1"):
self.api_key = api_key
self.base_url = base_url
def _send_request(self, method, endpoint, json=None, params=None, stream=False):
- headers = {
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json"
- }
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
url = f"{self.base_url}{endpoint}"
response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream)
return response
-
def _send_request_with_files(self, method, endpoint, data, files):
- headers = {
- "Authorization": f"Bearer {self.api_key}"
- }
+ headers = {"Authorization": f"Bearer {self.api_key}"}
url = f"{self.base_url}{endpoint}"
response = requests.request(method, url, data=data, headers=headers, files=files)
return response
-
+
def message_feedback(self, message_id, rating, user):
- data = {
- "rating": rating,
- "user": user
- }
+ data = {"rating": rating, "user": user}
return self._send_request("POST", f"/messages/{message_id}/feedbacks", data)
-
+
def get_application_parameters(self, user):
params = {"user": user}
return self._send_request("GET", "/parameters", params=params)
-
+
def file_upload(self, user, files):
- data = {
- "user": user
- }
+ data = {"user": user}
return self._send_request_with_files("POST", "/files/upload", data=data, files=files)
- def text_to_audio(self, text:str, user:str, streaming:bool=False):
- data = {
- "text": text,
- "user": user,
- "streaming": streaming
- }
+ def text_to_audio(self, text: str, user: str, streaming: bool = False):
+ data = {"text": text, "user": user, "streaming": streaming}
return self._send_request("POST", "/text-to-audio", data=data)
-
- def get_meta(self,user):
- params = { "user": user}
- return self._send_request("GET", f"/meta", params=params)
+
+ def get_meta(self, user):
+ params = {"user": user}
+ return self._send_request("GET", "/meta", params=params)
class CompletionClient(DifyClient):
def create_completion_message(self, inputs, response_mode, user, files=None):
- data = {
- "inputs": inputs,
- "response_mode": response_mode,
- "user": user,
- "files": files
- }
- return self._send_request("POST", "/completion-messages", data,
- stream=True if response_mode == "streaming" else False)
+ data = {"inputs": inputs, "response_mode": response_mode, "user": user, "files": files}
+ return self._send_request(
+ "POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False
+ )
class ChatClient(DifyClient):
def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None):
- data = {
- "inputs": inputs,
- "query": query,
- "user": user,
- "response_mode": response_mode,
- "files": files
- }
+ data = {"inputs": inputs, "query": query, "user": user, "response_mode": response_mode, "files": files}
if conversation_id:
data["conversation_id"] = conversation_id
- return self._send_request("POST", "/chat-messages", data,
- stream=True if response_mode == "streaming" else False)
-
- def get_suggested(self, message_id, user:str):
+ return self._send_request(
+ "POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False
+ )
+
+ def get_suggested(self, message_id, user: str):
params = {"user": user}
return self._send_request("GET", f"/messages/{message_id}/suggested", params=params)
-
+
def stop_message(self, task_id, user):
data = {"user": user}
- return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
+ return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
def get_conversations(self, user, last_id=None, limit=None, pinned=None):
params = {"user": user, "last_id": last_id, "limit": limit, "pinned": pinned}
return self._send_request("GET", "/conversations", params=params)
-
+
def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None):
params = {"user": user}
@@ -109,15 +86,15 @@ class ChatClient(DifyClient):
params["limit"] = limit
return self._send_request("GET", "/messages", params=params)
-
- def rename_conversation(self, conversation_id, name,auto_generate:bool, user:str):
- data = {"name": name, "auto_generate": auto_generate,"user": user}
+
+ def rename_conversation(self, conversation_id, name, auto_generate: bool, user: str):
+ data = {"name": name, "auto_generate": auto_generate, "user": user}
return self._send_request("POST", f"/conversations/{conversation_id}/name", data)
def delete_conversation(self, conversation_id, user):
data = {"user": user}
return self._send_request("DELETE", f"/conversations/{conversation_id}", data)
-
+
def audio_to_text(self, audio_file, user):
data = {"user": user}
files = {"audio_file": audio_file}
@@ -125,10 +102,10 @@ class ChatClient(DifyClient):
class WorkflowClient(DifyClient):
- def run(self, inputs:dict, response_mode:str="streaming", user:str="abc-123"):
+ def run(self, inputs: dict, response_mode: str = "streaming", user: str = "abc-123"):
data = {"inputs": inputs, "response_mode": response_mode, "user": user}
return self._send_request("POST", "/workflows/run", data)
-
+
def stop(self, task_id, user):
data = {"user": user}
return self._send_request("POST", f"/workflows/tasks/{task_id}/stop", data)
@@ -137,10 +114,8 @@ class WorkflowClient(DifyClient):
return self._send_request("GET", f"/workflows/run/{workflow_run_id}")
-
class KnowledgeBaseClient(DifyClient):
-
- def __init__(self, api_key, base_url: str = 'https://api.dify.ai/v1', dataset_id: str = None):
+ def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1", dataset_id: str = None):
"""
Construct a KnowledgeBaseClient object.
@@ -150,10 +125,7 @@ class KnowledgeBaseClient(DifyClient):
dataset_id (str, optional): ID of the dataset. Defaults to None. You don't need this if you just want to
create a new dataset. or list datasets. otherwise you need to set this.
"""
- super().__init__(
- api_key=api_key,
- base_url=base_url
- )
+ super().__init__(api_key=api_key, base_url=base_url)
self.dataset_id = dataset_id
def _get_dataset_id(self):
@@ -162,10 +134,10 @@ class KnowledgeBaseClient(DifyClient):
return self.dataset_id
def create_dataset(self, name: str, **kwargs):
- return self._send_request('POST', '/datasets', {'name': name}, **kwargs)
+ return self._send_request("POST", "/datasets", {"name": name}, **kwargs)
def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs):
- return self._send_request('GET', f'/datasets?page={page}&limit={page_size}', **kwargs)
+ return self._send_request("GET", f"/datasets?page={page}&limit={page_size}", **kwargs)
def create_document_by_text(self, name, text, extra_params: dict = None, **kwargs):
"""
@@ -193,14 +165,7 @@ class KnowledgeBaseClient(DifyClient):
}
:return: Response from the API
"""
- data = {
- 'indexing_technique': 'high_quality',
- 'process_rule': {
- 'mode': 'automatic'
- },
- 'name': name,
- 'text': text
- }
+ data = {"indexing_technique": "high_quality", "process_rule": {"mode": "automatic"}, "name": name, "text": text}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
url = f"/datasets/{self._get_dataset_id()}/document/create_by_text"
@@ -233,10 +198,7 @@ class KnowledgeBaseClient(DifyClient):
}
:return: Response from the API
"""
- data = {
- 'name': name,
- 'text': text
- }
+ data = {"name": name, "text": text}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_text"
@@ -269,16 +231,11 @@ class KnowledgeBaseClient(DifyClient):
:return: Response from the API
"""
files = {"file": open(file_path, "rb")}
- data = {
- 'process_rule': {
- 'mode': 'automatic'
- },
- 'indexing_technique': 'high_quality'
- }
+ data = {"process_rule": {"mode": "automatic"}, "indexing_technique": "high_quality"}
if extra_params is not None and isinstance(extra_params, dict):
data.update(extra_params)
if original_document_id is not None:
- data['original_document_id'] = original_document_id
+ data["original_document_id"] = original_document_id
url = f"/datasets/{self._get_dataset_id()}/document/create_by_file"
return self._send_request_with_files("POST", url, {"data": json.dumps(data)}, files)
@@ -352,11 +309,11 @@ class KnowledgeBaseClient(DifyClient):
"""
params = {}
if page is not None:
- params['page'] = page
+ params["page"] = page
if page_size is not None:
- params['limit'] = page_size
+ params["limit"] = page_size
if keyword is not None:
- params['keyword'] = keyword
+ params["keyword"] = keyword
url = f"/datasets/{self._get_dataset_id()}/documents"
return self._send_request("GET", url, params=params, **kwargs)
@@ -383,9 +340,9 @@ class KnowledgeBaseClient(DifyClient):
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments"
params = {}
if keyword is not None:
- params['keyword'] = keyword
+ params["keyword"] = keyword
if status is not None:
- params['status'] = status
+ params["status"] = status
if "params" in kwargs:
params.update(kwargs["params"])
return self._send_request("GET", url, params=params, **kwargs)
diff --git a/web/app/activate/page.tsx b/web/app/activate/page.tsx
index 90874f50ce..0f18544335 100644
--- a/web/app/activate/page.tsx
+++ b/web/app/activate/page.tsx
@@ -22,7 +22,7 @@ const Activate = () => {
{children}
)
- }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props])
+ }
+ else if (language === 'svg' && isSVG) {
+ return (
+ {children}
+
+ return (
+