From 2721cb8dee8dca4211af8cf2db984e54e89cafb3 Mon Sep 17 00:00:00 2001 From: HJY <1398145450@qq.com> Date: Thu, 19 Sep 2024 10:39:27 +0800 Subject: [PATCH 01/40] feat: add format util unit and add pre-commit unit check (#8427) --- CONTRIBUTING_CN.md | 4 +-- web/.husky/pre-commit | 27 ++++++++++++++++++ web/README.md | 6 +++- web/utils/format.spec.ts | 61 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 web/utils/format.spec.ts diff --git a/CONTRIBUTING_CN.md b/CONTRIBUTING_CN.md index 7cd2bb60eb..310c55090a 100644 --- a/CONTRIBUTING_CN.md +++ b/CONTRIBUTING_CN.md @@ -36,7 +36,7 @@ | 被团队成员标记为高优先级的功能 | 高优先级 | | 在 [community feedback board](https://github.com/langgenius/dify/discussions/categories/feedbacks) 内反馈的常见功能请求 | 中等优先级 | | 非核心功能和小幅改进 | 低优先级 | - | 有价值当不紧急 | 未来功能 | + | 有价值但不紧急 | 未来功能 | ### 其他任何事情(例如 bug 报告、性能优化、拼写错误更正): * 立即开始编码。 @@ -138,7 +138,7 @@ Dify 的后端使用 Python 编写,使用 [Flask](https://flask.palletsproject ├── models // 描述数据模型和 API 响应的形状 ├── public // 如 favicon 等元资源 ├── service // 定义 API 操作的形状 -├── test +├── test ├── types // 函数参数和返回值的描述 └── utils // 共享的实用函数 ``` diff --git a/web/.husky/pre-commit b/web/.husky/pre-commit index 6df8b24b61..d9290e1853 100755 --- a/web/.husky/pre-commit +++ b/web/.husky/pre-commit @@ -51,5 +51,32 @@ if $web_modified; then echo "Running ESLint on web module" cd ./web || exit 1 npx lint-staged + + echo "Running unit tests check" + modified_files=$(git diff --cached --name-only -- utils | grep -v '\.spec\.ts$' || true) + + if [ -n "$modified_files" ]; then + for file in $modified_files; do + test_file="${file%.*}.spec.ts" + echo "Checking for test file: $test_file" + + # check if the test file exists + if [ -f "../$test_file" ]; then + echo "Detected changes in $file, running corresponding unit tests..." + npm run test "../$test_file" + + if [ $? -ne 0 ]; then + echo "Unit tests failed. Please fix the errors before committing." + exit 1 + fi + echo "Unit tests for $file passed." + else + echo "Warning: $file does not have a corresponding test file." + fi + + done + echo "All unit tests for modified web/utils files have passed." + fi + cd ../ fi diff --git a/web/README.md b/web/README.md index 867d822e27..a84ef21007 100644 --- a/web/README.md +++ b/web/README.md @@ -18,6 +18,10 @@ yarn install --frozen-lockfile Then, configure the environment variables. Create a file named `.env.local` in the current directory and copy the contents from `.env.example`. Modify the values of these environment variables according to your requirements: +```bash +cp .env.example .env.local +``` + ``` # For production release, change this to PRODUCTION NEXT_PUBLIC_DEPLOY_ENV=DEVELOPMENT @@ -78,7 +82,7 @@ If your IDE is VSCode, rename `web/.vscode/settings.example.json` to `web/.vscod We start to use [Jest](https://jestjs.io/) and [React Testing Library](https://testing-library.com/docs/react-testing-library/intro/) for Unit Testing. -You can create a test file with a suffix of `.spec` beside the file that to be tested. For example, if you want to test a file named `util.ts`. The test file name should be `util.spec.ts`. +You can create a test file with a suffix of `.spec` beside the file that to be tested. For example, if you want to test a file named `util.ts`. The test file name should be `util.spec.ts`. Run test: diff --git a/web/utils/format.spec.ts b/web/utils/format.spec.ts new file mode 100644 index 0000000000..f349efa4e4 --- /dev/null +++ b/web/utils/format.spec.ts @@ -0,0 +1,61 @@ +import { formatFileSize, formatNumber, formatTime } from './format' +describe('formatNumber', () => { + test('should correctly format integers', () => { + expect(formatNumber(1234567)).toBe('1,234,567') + }) + test('should correctly format decimals', () => { + expect(formatNumber(1234567.89)).toBe('1,234,567.89') + }) + test('should correctly handle string input', () => { + expect(formatNumber('1234567')).toBe('1,234,567') + }) + test('should correctly handle zero', () => { + expect(formatNumber(0)).toBe(0) + }) + test('should correctly handle negative numbers', () => { + expect(formatNumber(-1234567)).toBe('-1,234,567') + }) + test('should correctly handle empty input', () => { + expect(formatNumber('')).toBe('') + }) +}) +describe('formatFileSize', () => { + test('should return the input if it is falsy', () => { + expect(formatFileSize(0)).toBe(0) + }) + test('should format bytes correctly', () => { + expect(formatFileSize(500)).toBe('500.00B') + }) + test('should format kilobytes correctly', () => { + expect(formatFileSize(1500)).toBe('1.46KB') + }) + test('should format megabytes correctly', () => { + expect(formatFileSize(1500000)).toBe('1.43MB') + }) + test('should format gigabytes correctly', () => { + expect(formatFileSize(1500000000)).toBe('1.40GB') + }) + test('should format terabytes correctly', () => { + expect(formatFileSize(1500000000000)).toBe('1.36TB') + }) + test('should format petabytes correctly', () => { + expect(formatFileSize(1500000000000000)).toBe('1.33PB') + }) +}) +describe('formatTime', () => { + test('should return the input if it is falsy', () => { + expect(formatTime(0)).toBe(0) + }) + test('should format seconds correctly', () => { + expect(formatTime(30)).toBe('30.00 sec') + }) + test('should format minutes correctly', () => { + expect(formatTime(90)).toBe('1.50 min') + }) + test('should format hours correctly', () => { + expect(formatTime(3600)).toBe('1.00 h') + }) + test('should handle large numbers', () => { + expect(formatTime(7200)).toBe('2.00 h') + }) +}) From 54b9e1f6d120a92f2049d513eaa68270faff51a0 Mon Sep 17 00:00:00 2001 From: takatost Date: Thu, 19 Sep 2024 11:43:00 +0800 Subject: [PATCH 02/40] fix: ci issues(missing duckduckgo-search==6.2.11, ruff lint issue) (#8543) --- .../comfyui/tools/comfyui_stable_diffusion.py | 2 +- api/poetry.lock | 26 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py index b9b52c0b4d..81fc8cc985 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py @@ -290,7 +290,7 @@ class ComfyuiStableDiffusionTool(BuiltinTool): draw_options["6"]["inputs"]["text"] = prompt draw_options["7"]["inputs"]["text"] = negative_prompt # if the model is SD3 or FLUX series, the Latent class should be corresponding to SD3 Latent - if model_type in (ModelType.SD3.name, ModelType.FLUX.name): + if model_type in {ModelType.SD3.name, ModelType.FLUX.name}: draw_options["5"]["class_type"] = "EmptySD3LatentImage" if lora_list: diff --git a/api/poetry.lock b/api/poetry.lock index 191db600e4..28c688cc9c 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -2296,18 +2296,18 @@ files = [ [[package]] name = "duckduckgo-search" -version = "6.2.11" +version = "6.2.12" description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine." optional = false python-versions = ">=3.8" files = [ - {file = "duckduckgo_search-6.2.11-py3-none-any.whl", hash = "sha256:6fb7069b79e8928f487001de6859034ade19201bdcd257ec198802430e374bfe"}, - {file = "duckduckgo_search-6.2.11.tar.gz", hash = "sha256:6b6ef1b552c5e67f23e252025d2504caf6f9fc14f70e86c6dd512200f386c673"}, + {file = "duckduckgo_search-6.2.12-py3-none-any.whl", hash = "sha256:0d379c1f845b632a41553efb13d571788f19ad289229e641a27b5710d92097a6"}, + {file = "duckduckgo_search-6.2.12.tar.gz", hash = "sha256:04f9f1459763668d268344c7a32d943173d0e060dad53a5c2df4b4d3ca9a74cf"}, ] [package.dependencies] click = ">=8.1.7" -primp = ">=0.6.1" +primp = ">=0.6.2" [package.extras] dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"] @@ -6356,19 +6356,19 @@ dill = ["dill (>=0.3.8)"] [[package]] name = "primp" -version = "0.6.1" +version = "0.6.2" description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints" optional = false python-versions = ">=3.8" files = [ - {file = "primp-0.6.1-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:60cfe95e0bdf154b0f9036d38acaddc9aef02d6723ed125839b01449672d3946"}, - {file = "primp-0.6.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:e1e92433ecf32639f9e800bc3a5d58b03792bdec99421b7fb06500e2fae63c85"}, - {file = "primp-0.6.1-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e02353f13f07fb5a6f91df9e2f4d8ec9f41312de95088744dce1c9729a3865d"}, - {file = "primp-0.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:c5a2ccfdf488b17be225a529a31e2b22724b2e22fba8e1ae168a222f857c2dc0"}, - {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:f335c2ace907800a23bbb7bc6e15acc7fff659b86a2d5858817f6ed79cea07cf"}, - {file = "primp-0.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5dc15bd9d47ded7bc356fcb5d8321972dcbeba18e7d3b7250e12bb7365447b2b"}, - {file = "primp-0.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:eebf0412ebba4089547b16b97b765d83f69f1433d811bb02b02cdcdbca20f672"}, - {file = "primp-0.6.1.tar.gz", hash = "sha256:64b3c12e3d463a887518811c46f3ec37cca02e6af1ddf1287e548342de436301"}, + {file = "primp-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4a35d441462a55d9a9525bf170e2ffd2fcb3db6039b23e802859fa22c18cdd51"}, + {file = "primp-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:f67ccade95bdbca3cf9b96b93aa53f9617d85ddbf988da4e9c523aa785fd2d54"}, + {file = "primp-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8074b93befaf36567e4cf3d4a1a8cd6ab9cc6e4dd4ff710650678daa405aee71"}, + {file = "primp-0.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:7d3e2a3f8c6262e9b883651b79c4ff2b7677a76f47293a139f541c9ea333ce3b"}, + {file = "primp-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:a460ea389371c6d04839b4b50b5805d99da8ebe281a2e8b534d27377c6d44f0e"}, + {file = "primp-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5b6b27e89d3c05c811aff0e4fde7a36d6957b15b3112f4ce28b6b99e8ca1e725"}, + {file = "primp-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:1006a40a85f88a4c5222094813a1ebc01f85a63e9a33d2c443288c0720bed321"}, + {file = "primp-0.6.2.tar.gz", hash = "sha256:5a96a6b65195a8a989157e67d23bd171c49be238654e02bdf1b1fda36cbcc068"}, ] [package.extras] From ffd2f61dd940d64974e457a6327cecb7bb602922 Mon Sep 17 00:00:00 2001 From: takatost Date: Thu, 19 Sep 2024 15:34:56 +0800 Subject: [PATCH 03/40] fix: thread_pool submit count in parallel workflow not releasing (#8549) --- .../workflow/graph_engine/graph_engine.py | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index 1db9b690ab..57e4f716fd 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -61,6 +61,9 @@ class GraphEngineThreadPool(ThreadPoolExecutor): return super().submit(fn, *args, **kwargs) + def task_done_callback(self, future): + self.submit_count -= 1 + def check_is_full(self) -> None: print(f"submit_count: {self.submit_count}, max_submit_count: {self.max_submit_count}") if self.submit_count > self.max_submit_count: @@ -426,20 +429,22 @@ class GraphEngine: ): continue - futures.append( - self.thread_pool.submit( - self._run_parallel_node, - **{ - "flask_app": current_app._get_current_object(), # type: ignore[attr-defined] - "q": q, - "parallel_id": parallel_id, - "parallel_start_node_id": edge.target_node_id, - "parent_parallel_id": in_parallel_id, - "parent_parallel_start_node_id": parallel_start_node_id, - }, - ) + future = self.thread_pool.submit( + self._run_parallel_node, + **{ + "flask_app": current_app._get_current_object(), # type: ignore[attr-defined] + "q": q, + "parallel_id": parallel_id, + "parallel_start_node_id": edge.target_node_id, + "parent_parallel_id": in_parallel_id, + "parent_parallel_start_node_id": parallel_start_node_id, + }, ) + future.add_done_callback(self.thread_pool.task_done_callback) + + futures.append(future) + succeeded_count = 0 while True: try: From d6de96c4b43d46ffcc341ec145406633de833c0c Mon Sep 17 00:00:00 2001 From: Su Yang Date: Thu, 19 Sep 2024 17:08:59 +0800 Subject: [PATCH 04/40] feat: sync Qwen API with Aliyun Bailian (#8538) --- .../tongyi/llm/qwen-max-0107.yaml | 81 +++++++++++++++++++ .../tongyi/llm/qwen-max-1201.yaml | 1 + .../tongyi/llm/qwen-plus-0206.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-plus-0624.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-plus-0723.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-plus-0806.yaml | 79 ++++++++++++++++++ .../model_providers/tongyi/llm/qwen-plus.yaml | 2 +- .../tongyi/llm/qwen-turbo-0206.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-turbo-0624.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-vl-max-0201.yaml | 47 +++++++++++ .../tongyi/llm/qwen-vl-max-0809.yaml | 47 +++++++++++ .../tongyi/llm/qwen-vl-max.yaml | 2 +- .../tongyi/llm/qwen-vl-plus-0809.yaml | 47 +++++++++++ .../tongyi/llm/qwen-vl-plus.yaml | 2 +- .../tongyi/llm/qwen2-math-1.5b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2-math-72b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2-math-7b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-0.5b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-1.5b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-14b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-32b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-3b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-72b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-7b-instruct.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen2.5-coder-7b-instruct.yaml | 79 ++++++++++++++++++ .../model_providers/tongyi/tongyi.yaml | 4 +- 26 files changed, 1571 insertions(+), 5 deletions(-) create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml new file mode 100644 index 0000000000..7c90afecf5 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml @@ -0,0 +1,81 @@ +model: qwen-max-0107 +label: + en_US: qwen-max-0107 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.04' + output: '0.12' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml index 0368a4a01e..dc234783cd 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml @@ -79,3 +79,4 @@ pricing: output: '0.12' unit: '0.001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml new file mode 100644 index 0000000000..7940be9e8b --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-0206 +label: + en_US: qwen-plus-0206 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml new file mode 100644 index 0000000000..0e02526beb --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-0624 +label: + en_US: qwen-plus-0624 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml new file mode 100644 index 0000000000..65175f1b10 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-0806 +label: + en_US: qwen-plus-0806 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml new file mode 100644 index 0000000000..1c530dcba2 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-0806 +label: + en_US: qwen-plus-0806 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index 4be78627f0..e78b77c7f2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -6,7 +6,7 @@ features: - agent-thought model_properties: mode: completion - context_size: 32768 + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml new file mode 100644 index 0000000000..2c9857cf9f --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml @@ -0,0 +1,79 @@ +model: qwen-turbo-0206 +label: + en_US: qwen-turbo-0206 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 1500 + min: 1 + max: 1500 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml new file mode 100644 index 0000000000..7ea5afc795 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml @@ -0,0 +1,79 @@ +model: qwen-turbo-0624 +label: + en_US: qwen-turbo-0624 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 1500 + min: 1 + max: 1500 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml new file mode 100644 index 0000000000..fffd732ca5 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml @@ -0,0 +1,47 @@ +model: qwen-vl-max-0201 +label: + en_US: qwen-vl-max-0201 +model_type: llm +features: + - vision + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml new file mode 100644 index 0000000000..af8742b981 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml @@ -0,0 +1,47 @@ +model: qwen-vl-max-0809 +label: + en_US: qwen-vl-max-0809 +model_type: llm +features: + - vision + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml index f917ccaa5d..a93d456428 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml @@ -7,7 +7,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 8192 + context_size: 32768 parameter_rules: - name: top_p use_template: top_p diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml new file mode 100644 index 0000000000..12573511b9 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml @@ -0,0 +1,47 @@ +model: qwen-vl-plus-0809 +label: + en_US: qwen-vl-plus-0809 +model_type: llm +features: + - vision + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format +pricing: + input: '0.008' + output: '0.008' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml index e2dd8c4e57..13468c44ee 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml @@ -7,7 +7,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 32768 + context_size: 8192 parameter_rules: - name: top_p use_template: top_p diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml new file mode 100644 index 0000000000..8b204ff1f0 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2-math-1.5b-instruct +label: + en_US: qwen2-math-1.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml new file mode 100644 index 0000000000..3875a274e7 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2-math-72b-instruct +label: + en_US: qwen2-math-72b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml new file mode 100644 index 0000000000..0920806845 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2-math-7b-instruct +label: + en_US: qwen2-math-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml new file mode 100644 index 0000000000..824954323b --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-0.5b-instruct +label: + en_US: qwen2.5-0.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml new file mode 100644 index 0000000000..c0a4b45be6 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-1.5b-instruct +label: + en_US: qwen2.5-1.5b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml new file mode 100644 index 0000000000..92b67804e8 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-14b-instruct +label: + en_US: qwen2.5-14b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml new file mode 100644 index 0000000000..960438e3e7 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-32b-instruct +label: + en_US: qwen2.5-32b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0035' + output: '0.007' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml new file mode 100644 index 0000000000..59a8827d9e --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-3b-instruct +label: + en_US: qwen2.5-3b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.000' + output: '0.000' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml new file mode 100644 index 0000000000..f14ee2daff --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-72b-instruct +label: + en_US: qwen2.5-72b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml new file mode 100644 index 0000000000..8ea8166358 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-7b-instruct +label: + en_US: qwen2.5-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml new file mode 100644 index 0000000000..8ea8166358 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml @@ -0,0 +1,79 @@ +model: qwen2.5-7b-instruct +label: + en_US: qwen2.5-7b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.001' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml index b251391e34..de2c289c94 100644 --- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml +++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml @@ -11,9 +11,9 @@ background: "#EFF1FE" help: title: en_US: Get your API key from AliCloud - zh_Hans: 从阿里云获取 API Key + zh_Hans: 从阿里云百炼获取 API Key url: - en_US: https://dashscope.console.aliyun.com/api-key_management + en_US: https://bailian.console.aliyun.com/?apiKey=1#/api-key supported_model_types: - llm - tts From d96f5ba1cad744e2f6760d01dc5553946b416b1c Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:34:12 +0800 Subject: [PATCH 05/40] add storage error log (#8556) --- api/extensions/ext_sentry.py | 10 ++++++- api/extensions/ext_storage.py | 49 ++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/api/extensions/ext_sentry.py b/api/extensions/ext_sentry.py index c2dc736038..e255e7eb35 100644 --- a/api/extensions/ext_sentry.py +++ b/api/extensions/ext_sentry.py @@ -5,6 +5,8 @@ from sentry_sdk.integrations.celery import CeleryIntegration from sentry_sdk.integrations.flask import FlaskIntegration from werkzeug.exceptions import HTTPException +from core.model_runtime.errors.invoke import InvokeRateLimitError + def before_send(event, hint): if "exc_info" in hint: @@ -20,7 +22,13 @@ def init_app(app): sentry_sdk.init( dsn=app.config.get("SENTRY_DSN"), integrations=[FlaskIntegration(), CeleryIntegration()], - ignore_errors=[HTTPException, ValueError, openai.APIStatusError, parse_error.defaultErrorResponse], + ignore_errors=[ + HTTPException, + ValueError, + openai.APIStatusError, + InvokeRateLimitError, + parse_error.defaultErrorResponse, + ], traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 1.0), profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 1.0), environment=app.config.get("DEPLOY_ENV"), diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 5ce18b7292..1e6530f6f4 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -1,3 +1,4 @@ +import logging from collections.abc import Generator from typing import Union @@ -40,28 +41,56 @@ class Storage: self.storage_runner = LocalStorage(app=app) def save(self, filename, data): - self.storage_runner.save(filename, data) + try: + self.storage_runner.save(filename, data) + except Exception as e: + logging.exception("Failed to save file: %s", e) + raise e def load(self, filename: str, stream: bool = False) -> Union[bytes, Generator]: - if stream: - return self.load_stream(filename) - else: - return self.load_once(filename) + try: + if stream: + return self.load_stream(filename) + else: + return self.load_once(filename) + except Exception as e: + logging.exception("Failed to load file: %s", e) + raise e def load_once(self, filename: str) -> bytes: - return self.storage_runner.load_once(filename) + try: + return self.storage_runner.load_once(filename) + except Exception as e: + logging.exception("Failed to load_once file: %s", e) + raise e def load_stream(self, filename: str) -> Generator: - return self.storage_runner.load_stream(filename) + try: + return self.storage_runner.load_stream(filename) + except Exception as e: + logging.exception("Failed to load_stream file: %s", e) + raise e def download(self, filename, target_filepath): - self.storage_runner.download(filename, target_filepath) + try: + self.storage_runner.download(filename, target_filepath) + except Exception as e: + logging.exception("Failed to download file: %s", e) + raise e def exists(self, filename): - return self.storage_runner.exists(filename) + try: + return self.storage_runner.exists(filename) + except Exception as e: + logging.exception("Failed to check file exists: %s", e) + raise e def delete(self, filename): - return self.storage_runner.delete(filename) + try: + return self.storage_runner.delete(filename) + except Exception as e: + logging.exception("Failed to delete file: %s", e) + raise e storage = Storage() From 7411bcf1673c7bdd1654ce9c30bce37054f735ca Mon Sep 17 00:00:00 2001 From: Joel Date: Thu, 19 Sep 2024 17:40:20 +0800 Subject: [PATCH 06/40] chore: improve delimiter (#8552) --- .../datasets/create/step-two/escape.ts | 18 +++++++ .../datasets/create/step-two/index.tsx | 37 +++++++++---- .../datasets/create/step-two/unescape.ts | 54 +++++++++++++++++++ web/i18n/en-US/dataset-creation.ts | 3 +- web/i18n/zh-Hans/dataset-creation.ts | 3 +- 5 files changed, 103 insertions(+), 12 deletions(-) create mode 100644 web/app/components/datasets/create/step-two/escape.ts create mode 100644 web/app/components/datasets/create/step-two/unescape.ts diff --git a/web/app/components/datasets/create/step-two/escape.ts b/web/app/components/datasets/create/step-two/escape.ts new file mode 100644 index 0000000000..098f43bc7f --- /dev/null +++ b/web/app/components/datasets/create/step-two/escape.ts @@ -0,0 +1,18 @@ +function escape(input: string): string { + if (!input || typeof input !== 'string') + return '' + + const res = input + .replaceAll('\\', '\\\\') + .replaceAll('\0', '\\0') + .replaceAll('\b', '\\b') + .replaceAll('\f', '\\f') + .replaceAll('\n', '\\n') + .replaceAll('\r', '\\r') + .replaceAll('\t', '\\t') + .replaceAll('\v', '\\v') + .replaceAll('\'', '\\\'') + return res +} + +export default escape diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 15332b944d..94614918db 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1,5 +1,5 @@ 'use client' -import React, { useEffect, useLayoutEffect, useRef, useState } from 'react' +import React, { useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import { useBoolean } from 'ahooks' @@ -13,6 +13,8 @@ import { groupBy } from 'lodash-es' import PreviewItem, { PreviewType } from './preview-item' import LanguageSelect from './language-select' import s from './index.module.css' +import unescape from './unescape' +import escape from './escape' import cn from '@/utils/classnames' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import { @@ -78,6 +80,8 @@ enum IndexingType { ECONOMICAL = 'economy', } +const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' + const StepTwo = ({ isSetting, documentDetail, @@ -110,8 +114,11 @@ const StepTwo = ({ const previewScrollRef = useRef(null) const [previewScrolled, setPreviewScrolled] = useState(false) const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) - const [segmentIdentifier, setSegmentIdentifier] = useState('\\n') - const [max, setMax] = useState(5000) // default chunk length + const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) + const setSegmentIdentifier = useCallback((value: string) => { + doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER) + }, []) + const [max, setMax] = useState(4000) // default chunk length const [overlap, setOverlap] = useState(50) const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() @@ -183,7 +190,7 @@ const StepTwo = ({ } const resetRules = () => { if (defaultConfig) { - setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n') + setSegmentIdentifier(defaultConfig.segmentation.separator) setMax(defaultConfig.segmentation.max_tokens) setOverlap(defaultConfig.segmentation.chunk_overlap) setRules(defaultConfig.pre_processing_rules) @@ -217,7 +224,7 @@ const StepTwo = ({ const ruleObj = { pre_processing_rules: rules, segmentation: { - separator: segmentIdentifier === '\\n' ? '\n' : segmentIdentifier, + separator: unescape(segmentIdentifier), max_tokens: max, chunk_overlap: overlap, }, @@ -394,7 +401,7 @@ const StepTwo = ({ try { const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) const separator = res.rules.segmentation.separator - setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') + setSegmentIdentifier(separator) setMax(res.rules.segmentation.max_tokens) setOverlap(res.rules.segmentation.chunk_overlap) setRules(res.rules.pre_processing_rules) @@ -411,7 +418,7 @@ const StepTwo = ({ const separator = rules.segmentation.separator const max = rules.segmentation.max_tokens const overlap = rules.segmentation.chunk_overlap - setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') + setSegmentIdentifier(separator) setMax(max) setOverlap(overlap) setRules(rules.pre_processing_rules) @@ -616,12 +623,22 @@ const StepTwo = ({
-
{t('datasetCreation.stepTwo.separator')}
+
+ {t('datasetCreation.stepTwo.separator')} + + {t('datasetCreation.stepTwo.separatorTip')} +
+ } + /> +
setSegmentIdentifier(e.target.value)} + placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} + value={segmentIdentifier} + onChange={e => doSetSegmentIdentifier(e.target.value)} />
diff --git a/web/app/components/datasets/create/step-two/unescape.ts b/web/app/components/datasets/create/step-two/unescape.ts new file mode 100644 index 0000000000..5c0f9e426a --- /dev/null +++ b/web/app/components/datasets/create/step-two/unescape.ts @@ -0,0 +1,54 @@ +// https://github.com/iamakulov/unescape-js/blob/master/src/index.js + +/** + * \\ - matches the backslash which indicates the beginning of an escape sequence + * ( + * u\{([0-9A-Fa-f]+)\} - first alternative; matches the variable-length hexadecimal escape sequence (\u{ABCD0}) + * | + * u([0-9A-Fa-f]{4}) - second alternative; matches the 4-digit hexadecimal escape sequence (\uABCD) + * | + * x([0-9A-Fa-f]{2}) - third alternative; matches the 2-digit hexadecimal escape sequence (\xA5) + * | + * ([1-7][0-7]{0,2}|[0-7]{2,3}) - fourth alternative; matches the up-to-3-digit octal escape sequence (\5 or \512) + * | + * (['"tbrnfv0\\]) - fifth alternative; matches the special escape characters (\t, \n and so on) + * | + * \U([0-9A-Fa-f]+) - sixth alternative; matches the 8-digit hexadecimal escape sequence used by python (\U0001F3B5) + * ) + */ +const jsEscapeRegex = /\\(u\{([0-9A-Fa-f]+)\}|u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([1-7][0-7]{0,2}|[0-7]{2,3})|(['"tbrnfv0\\]))|\\U([0-9A-Fa-f]{8})/g + +const usualEscapeSequences: Record = { + '0': '\0', + 'b': '\b', + 'f': '\f', + 'n': '\n', + 'r': '\r', + 't': '\t', + 'v': '\v', + '\'': '\'', + '"': '"', + '\\': '\\', +} + +const fromHex = (str: string) => String.fromCodePoint(parseInt(str, 16)) +const fromOct = (str: string) => String.fromCodePoint(parseInt(str, 8)) + +const unescape = (str: string) => { + return str.replace(jsEscapeRegex, (_, __, varHex, longHex, shortHex, octal, specialCharacter, python) => { + if (varHex !== undefined) + return fromHex(varHex) + else if (longHex !== undefined) + return fromHex(longHex) + else if (shortHex !== undefined) + return fromHex(shortHex) + else if (octal !== undefined) + return fromOct(octal) + else if (python !== undefined) + return fromHex(python) + else + return usualEscapeSequences[specialCharacter] + }) +} + +export default unescape diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index 40463593f9..32f9d596ca 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -87,7 +87,8 @@ const translation = { custom: 'Custom', customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.', separator: 'Delimiter', - separatorPlaceholder: 'For example, newline (\\\\n) or special separator (such as "***")', + separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).', + separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines', maxLength: 'Maximum chunk length', overlap: 'Chunk overlap', overlapTip: 'Setting the chunk overlap can maintain the semantic relevance between them, enhancing the retrieve effect. It is recommended to set 10%-25% of the maximum chunk size.', diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index 47a15921f7..78f5170791 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -87,7 +87,8 @@ const translation = { custom: '自定义', customDescription: '自定义分段规则、分段长度以及预处理规则等参数', separator: '分段标识符', - separatorPlaceholder: '例如换行符(\n)或特定的分隔符(如 "***")', + separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n),当段落超过最大块长度时,会按行进行分割。你也可以使用自定义的特殊分隔符(例如 ***)。', + separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行', maxLength: '分段最大长度', overlap: '分段重叠长度', overlapTip: '设置分段之间的重叠长度可以保留分段之间的语义关系,提升召回效果。建议设置为最大分段长度的10%-25%', From a03919c3b39075de171d6986bc03ca762758b9ac Mon Sep 17 00:00:00 2001 From: MuYu Date: Thu, 19 Sep 2024 18:08:01 +0800 Subject: [PATCH 07/40] feat: add hunyuan-vision (#8529) --- .../hunyuan/llm/_position.yaml | 1 + .../hunyuan/llm/hunyuan-vision.yaml | 39 +++++++++++++++++++ .../model_providers/hunyuan/llm/llm.py | 23 +++++++++++ 3 files changed, 63 insertions(+) create mode 100644 api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml index ca8600a534..f494984443 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/hunyuan/llm/_position.yaml @@ -3,3 +3,4 @@ - hunyuan-standard-256k - hunyuan-pro - hunyuan-turbo +- hunyuan-vision diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml new file mode 100644 index 0000000000..9edc7f4710 --- /dev/null +++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-vision.yaml @@ -0,0 +1,39 @@ +model: hunyuan-vision +label: + zh_Hans: hunyuan-vision + en_US: hunyuan-vision +model_type: llm +features: + - agent-thought + - tool-call + - multi-tool-call + - stream-tool-call + - vision +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8000 + - name: enable_enhance + label: + zh_Hans: 功能增强 + en_US: Enable Enhancement + type: boolean + help: + zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false + default: true +pricing: + input: '0.018' + output: '0.018' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py index b57e5e1c2b..2014de8516 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py @@ -1,6 +1,7 @@ import json import logging from collections.abc import Generator +from typing import cast from tencentcloud.common import credential from tencentcloud.common.exception import TencentCloudSDKException @@ -11,9 +12,12 @@ from tencentcloud.hunyuan.v20230901 import hunyuan_client, models from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, + ImagePromptMessageContent, PromptMessage, + PromptMessageContentType, PromptMessageTool, SystemPromptMessage, + TextPromptMessageContent, ToolPromptMessage, UserPromptMessage, ) @@ -143,6 +147,25 @@ class HunyuanLargeLanguageModel(LargeLanguageModel): tool_execute_result = {"result": message.content} content = json.dumps(tool_execute_result, ensure_ascii=False) dict_list.append({"Role": message.role.value, "Content": content, "ToolCallId": message.tool_call_id}) + elif isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + dict_list.append({"Role": message.role.value, "Content": message.content}) + else: + sub_messages = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + sub_message_dict = {"Type": "text", "Text": message_content.data} + sub_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast(ImagePromptMessageContent, message_content) + sub_message_dict = { + "Type": "image_url", + "ImageUrl": {"Url": message_content.data}, + } + sub_messages.append(sub_message_dict) + dict_list.append({"Role": message.role.value, "Contents": sub_messages}) else: dict_list.append({"Role": message.role.value, "Content": message.content}) return dict_list From 1568c5cae96652bbeea89e2971a48e19818d10fd Mon Sep 17 00:00:00 2001 From: Su Yang Date: Fri, 20 Sep 2024 15:29:33 +0800 Subject: [PATCH 08/40] fix: fix qwen series model type (#8580) --- .../model_runtime/model_providers/tongyi/llm/qwen-plus.yaml | 2 +- .../model_providers/tongyi/llm/qwen-turbo-0206.yaml | 2 +- .../model_providers/tongyi/llm/qwen-turbo-0624.yaml | 2 +- .../model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml | 2 +- .../model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml | 2 +- .../model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index e78b77c7f2..d7fb13d7c7 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml index 2c9857cf9f..ee6a090175 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 8192 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml index 7ea5afc795..9e46443f9c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 8192 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index d4c03100ec..7b2ca77b6b 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 8192 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml index 8b204ff1f0..cfe4b5a666 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-1.5b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 4096 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml index 3875a274e7..e541c197b0 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-72b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 4096 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml index 0920806845..ba4514e3d6 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2-math-7b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 4096 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml index 824954323b..e5596041af 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-0.5b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 32768 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml index c0a4b45be6..4004c59417 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-1.5b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 32768 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml index 92b67804e8..d8f53666ce 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-14b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml index 960438e3e7..890f7e6e4e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-32b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml index 59a8827d9e..6d3d2dd5bb 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-3b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 32768 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml index f14ee2daff..17d0eb5b35 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-72b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml index 8ea8166358..435b3f90a2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-7b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml index 8ea8166358..435b3f90a2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen2.5-coder-7b-instruct.yaml @@ -5,7 +5,7 @@ model_type: llm features: - agent-thought model_properties: - mode: completion + mode: chat context_size: 131072 parameter_rules: - name: temperature From c87f710d5836654614ea76409e96d094f44092d9 Mon Sep 17 00:00:00 2001 From: Su Yang Date: Fri, 20 Sep 2024 17:05:57 +0800 Subject: [PATCH 09/40] Fix: update qwen model and model config (#8584) Co-authored-by: -LAN- --- .../model_providers/tongyi/llm/_position.yaml | 51 ++++++++++++ .../tongyi/llm/qwen-coder-turbo-0919.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-coder-turbo-latest.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-coder-turbo.yaml | 79 ++++++++++++++++++ .../model_providers/tongyi/llm/qwen-long.yaml | 1 + .../tongyi/llm/qwen-math-plus-0816.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-plus-0919.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-plus-latest.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-plus.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-turbo-0919.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-turbo-latest.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-math-turbo.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-max-0107.yaml | 2 +- .../tongyi/llm/qwen-max-0403.yaml | 2 +- .../tongyi/llm/qwen-max-0428.yaml | 2 +- .../tongyi/llm/qwen-max-0919.yaml | 81 +++++++++++++++++++ .../tongyi/llm/qwen-max-latest.yaml | 81 +++++++++++++++++++ .../tongyi/llm/qwen-max-longcontext.yaml | 6 +- .../model_providers/tongyi/llm/qwen-max.yaml | 6 +- .../tongyi/llm/qwen-plus-0206.yaml | 6 +- .../tongyi/llm/qwen-plus-0624.yaml | 6 +- .../tongyi/llm/qwen-plus-0723.yaml | 10 +-- .../tongyi/llm/qwen-plus-0806.yaml | 4 +- .../tongyi/llm/qwen-plus-0919.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-plus-chat.yaml | 1 + .../tongyi/llm/qwen-plus-latest.yaml | 79 ++++++++++++++++++ .../model_providers/tongyi/llm/qwen-plus.yaml | 10 ++- .../tongyi/llm/qwen-turbo-0206.yaml | 6 +- .../tongyi/llm/qwen-turbo-0624.yaml | 6 +- .../tongyi/llm/qwen-turbo-0919.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-turbo-chat.yaml | 1 + .../tongyi/llm/qwen-turbo-latest.yaml | 79 ++++++++++++++++++ .../tongyi/llm/qwen-turbo.yaml | 12 +-- .../tongyi/llm/qwen-vl-max-0201.yaml | 1 + .../tongyi/llm/qwen-vl-max-0809.yaml | 12 ++- .../tongyi/llm/qwen-vl-max.yaml | 12 ++- .../tongyi/llm/qwen-vl-plus-0201.yaml | 57 +++++++++++++ .../tongyi/llm/qwen-vl-plus-0809.yaml | 10 +++ .../tongyi/llm/qwen-vl-plus.yaml | 12 ++- 39 files changed, 1464 insertions(+), 40 deletions(-) create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/_position.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml create mode 100644 api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml diff --git a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml new file mode 100644 index 0000000000..8ce336d60c --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml @@ -0,0 +1,51 @@ +- qwen-vl-max-0809 +- qwen-vl-max-0201 +- qwen-vl-max +- qwen-max-latest +- qwen-max-1201 +- qwen-max-0919 +- qwen-max-0428 +- qwen-max-0403 +- qwen-max-0107 +- qwen-max +- qwen-max-longcontext +- qwen-plus-latest +- qwen-plus-0919 +- qwen-plus-0806 +- qwen-plus-0723 +- qwen-plus-0624 +- qwen-plus-0206 +- qwen-plus-chat +- qwen-plus +- qwen-vl-plus-0809 +- qwen-vl-plus-0201 +- qwen-vl-plus +- qwen-turbo-latest +- qwen-turbo-0919 +- qwen-turbo-0624 +- qwen-turbo-0206 +- qwen-turbo-chat +- qwen-turbo +- qwen2.5-72b-instruct +- qwen2.5-32b-instruct +- qwen2.5-14b-instruct +- qwen2.5-7b-instruct +- qwen2.5-3b-instruct +- qwen2.5-1.5b-instruct +- qwen2.5-0.5b-instruct +- qwen2.5-coder-7b-instruct +- qwen2-math-72b-instruct +- qwen2-math-7b-instruct +- qwen2-math-1.5b-instruct +- qwen-long +- qwen-math-plus-latest +- qwen-math-plus-0919 +- qwen-math-plus-0816 +- qwen-math-plus +- qwen-math-turbo-latest +- qwen-math-turbo-0919 +- qwen-math-turbo +- qwen-coder-turbo-latest +- qwen-coder-turbo-0919 +- qwen-coder-turbo +- farui-plus diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml new file mode 100644 index 0000000000..ebba565d57 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-0919.yaml @@ -0,0 +1,79 @@ +model: qwen-coder-turbo-0919 +label: + en_US: qwen-coder-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml new file mode 100644 index 0000000000..361e2c2373 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo-latest.yaml @@ -0,0 +1,79 @@ +model: qwen-coder-turbo-latest +label: + en_US: qwen-coder-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml new file mode 100644 index 0000000000..f4032a4dd3 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-coder-turbo.yaml @@ -0,0 +1,79 @@ +model: qwen-coder-turbo +label: + en_US: qwen-coder-turbo +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml index 33b3435eb6..dbe7d024a5 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-long.yaml @@ -1,3 +1,4 @@ +# model docs: https://help.aliyun.com/zh/model-studio/getting-started/models#27b2b3a15d5c6 model: qwen-long label: en_US: qwen-long diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml new file mode 100644 index 0000000000..89d1302abe --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0816.yaml @@ -0,0 +1,79 @@ +model: qwen-math-plus-0816 +label: + en_US: qwen-math-plus-0816 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml new file mode 100644 index 0000000000..032b3c970d --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-0919.yaml @@ -0,0 +1,79 @@ +model: qwen-math-plus-0919 +label: + en_US: qwen-math-plus-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml new file mode 100644 index 0000000000..31dd9f6972 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus-latest.yaml @@ -0,0 +1,79 @@ +model: qwen-math-plus-latest +label: + en_US: qwen-math-plus-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml new file mode 100644 index 0000000000..1a51d57f78 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-plus.yaml @@ -0,0 +1,79 @@ +model: qwen-math-plus +label: + en_US: qwen-math-plus +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.004' + output: '0.012' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml new file mode 100644 index 0000000000..1894eea417 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-0919.yaml @@ -0,0 +1,79 @@ +model: qwen-math-turbo-0919 +label: + en_US: qwen-math-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml new file mode 100644 index 0000000000..b8365618b0 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo-latest.yaml @@ -0,0 +1,79 @@ +model: qwen-math-turbo-latest +label: + en_US: qwen-math-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml new file mode 100644 index 0000000000..8d346d691e --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-math-turbo.yaml @@ -0,0 +1,79 @@ +model: qwen-math-turbo +label: + en_US: qwen-math-turbo +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 3072 + min: 1 + max: 3072 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.002' + output: '0.006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml index 7c90afecf5..c0ad12b85e 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0107.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml index 935a16ebcb..b00fb44d29 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0403.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml index c39799a71f..1848dcc07d 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0428.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml new file mode 100644 index 0000000000..238882bb12 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-0919.yaml @@ -0,0 +1,81 @@ +model: qwen-max-0919 +label: + en_US: qwen-max-0919 +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.06' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml new file mode 100644 index 0000000000..9d7d3c2fcb --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-latest.yaml @@ -0,0 +1,81 @@ +model: qwen-max-latest +label: + en_US: qwen-max-latest +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.06' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml index 1c705670ca..a7bdc42f73 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 32768 + context_size: 32000 parameter_rules: - name: temperature use_template: temperature @@ -22,9 +22,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8000 min: 1 - max: 2000 + max: 8000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml index 64094effbb..57888406af 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature @@ -75,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.04' - output: '0.12' + input: '0.02' + output: '0.06' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml index 7940be9e8b..1e0b816617 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0206.yaml @@ -6,7 +6,7 @@ features: - agent-thought model_properties: mode: completion - context_size: 32768 + context_size: 32000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8000 min: 1 - max: 2000 + max: 8000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml index 0e02526beb..f70c373922 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0624.yaml @@ -6,7 +6,7 @@ features: - agent-thought model_properties: mode: completion - context_size: 32768 + context_size: 32000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8000 min: 1 - max: 2000 + max: 8000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml index 65175f1b10..c6007e9164 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0723.yaml @@ -1,12 +1,12 @@ -model: qwen-plus-0806 +model: qwen-plus-0723 label: - en_US: qwen-plus-0806 + en_US: qwen-plus-0723 model_type: llm features: - agent-thought model_properties: mode: completion - context_size: 32768 + context_size: 32000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8000 min: 1 - max: 2000 + max: 8000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml index 1c530dcba2..2f53c43336 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0806.yaml @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8192 min: 1 - max: 2000 + max: 8192 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml new file mode 100644 index 0000000000..90b54ca52e --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-0919.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-0919 +label: + en_US: qwen-plus-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: completion + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0008' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml index bc848072ed..59e8851240 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-chat.yaml @@ -79,3 +79,4 @@ pricing: output: '0.012' unit: '0.001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml new file mode 100644 index 0000000000..2a821dbcfe --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus-latest.yaml @@ -0,0 +1,79 @@ +model: qwen-plus-latest +label: + en_US: qwen-plus-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0008' + output: '0.002' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml index d7fb13d7c7..626884f4b2 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml @@ -3,7 +3,9 @@ label: en_US: qwen-plus model_type: llm features: + - multi-tool-call - agent-thought + - stream-tool-call model_properties: mode: chat context_size: 131072 @@ -20,9 +22,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 2000 + default: 8192 min: 1 - max: 2000 + max: 8192 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. @@ -73,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.004' - output: '0.012' + input: '0.0008' + output: '0.002' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml index ee6a090175..844fced77a 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0206.yaml @@ -6,7 +6,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 1500 + default: 2000 min: 1 - max: 1500 + max: 2000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml index 9e46443f9c..0152f75579 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0624.yaml @@ -6,7 +6,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +20,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 1500 + default: 2000 min: 1 - max: 1500 + max: 2000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml new file mode 100644 index 0000000000..19c6c8d293 --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-0919.yaml @@ -0,0 +1,79 @@ +model: qwen-turbo-0919 +label: + en_US: qwen-turbo-0919 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0003' + output: '0.0006' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml index f1950577ec..f557f311ef 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-chat.yaml @@ -79,3 +79,4 @@ pricing: output: '0.006' unit: '0.001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml new file mode 100644 index 0000000000..be2475847e --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo-latest.yaml @@ -0,0 +1,79 @@ +model: qwen-turbo-latest +label: + en_US: qwen-turbo-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 8192 + min: 1 + max: 8192 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. + - name: enable_search + type: boolean + default: false + help: + zh_Hans: 模型内置了互联网搜索服务,该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索,模型会将搜索结果作为文本生成过程中的参考信息,但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。 + en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic. + - name: response_format + use_template: response_format +pricing: + input: '0.0006' + output: '0.0003' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml index 7b2ca77b6b..90f13dc19f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml @@ -3,10 +3,12 @@ label: en_US: qwen-turbo model_type: llm features: + - multi-tool-call - agent-thought + - stream-tool-call model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: temperature use_template: temperature @@ -20,9 +22,9 @@ parameter_rules: - name: max_tokens use_template: max_tokens type: int - default: 1500 + default: 2000 min: 1 - max: 1500 + max: 2000 help: zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. @@ -73,7 +75,7 @@ parameter_rules: - name: response_format use_template: response_format pricing: - input: '0.002' - output: '0.006' + input: '0.0006' + output: '0.0003' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml index fffd732ca5..63b6074d0d 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0201.yaml @@ -45,3 +45,4 @@ pricing: output: '0.02' unit: '0.001' currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml index af8742b981..41d45966e9 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max-0809.yaml @@ -7,7 +7,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 32768 + context_size: 32000 parameter_rules: - name: top_p use_template: top_p @@ -28,6 +28,16 @@ parameter_rules: help: zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: seed required: false type: int diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml index a93d456428..78d0509374 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-max.yaml @@ -7,7 +7,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 32768 + context_size: 32000 parameter_rules: - name: top_p use_template: top_p @@ -28,6 +28,16 @@ parameter_rules: help: zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: seed required: false type: int diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml new file mode 100644 index 0000000000..8944388b1e --- /dev/null +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0201.yaml @@ -0,0 +1,57 @@ +model: qwen-vl-plus-0201 +label: + en_US: qwen-vl-plus-0201 +model_type: llm +features: + - vision + - agent-thought +model_properties: + mode: chat + context_size: 8000 +parameter_rules: + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: seed + required: false + type: int + default: 1234 + label: + zh_Hans: 随机种子 + en_US: Random seed + help: + zh_Hans: 生成时使用的随机数种子,用户控制模型生成内容的随机性。支持无符号64位整数,默认值为 1234。在使用seed时,模型将尽可能生成相同或相似的结果,但目前不保证每次生成的结果完全相同。 + en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time. + - name: response_format + use_template: response_format +pricing: + input: '0.02' + output: '0.02' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml index 12573511b9..869e0ea71c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus-0809.yaml @@ -28,6 +28,16 @@ parameter_rules: help: zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: seed required: false type: int diff --git a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml index 13468c44ee..da11bacc64 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml +++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-vl-plus.yaml @@ -7,7 +7,7 @@ features: - agent-thought model_properties: mode: chat - context_size: 8192 + context_size: 8000 parameter_rules: - name: top_p use_template: top_p @@ -28,6 +28,16 @@ parameter_rules: help: zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: max_tokens + required: false + use_template: max_tokens + type: int + default: 2000 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. - name: seed required: false type: int From 1583283635612affb76d8a6cc5002be9d7e7c484 Mon Sep 17 00:00:00 2001 From: Qun <51054082+QunBB@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:42:47 +0800 Subject: [PATCH 10/40] ComfyUI tool use the new internal enumeration class "VariableKey" (#8533) --- .../provider/builtin/comfyui/tools/comfyui_stable_diffusion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py index 81fc8cc985..eaa4b0d027 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_stable_diffusion.py @@ -333,7 +333,7 @@ class ComfyuiStableDiffusionTool(BuiltinTool): break return self.create_blob_message( - blob=image, meta={"mime_type": "image/png"}, save_as=self.VARIABLE_KEY.IMAGE.value + blob=image, meta={"mime_type": "image/png"}, save_as=self.VariableKey.IMAGE.value ) except Exception as e: From b773ebdab1a9c5b71ed7d26e2e40ed434a20e086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= Date: Fri, 20 Sep 2024 18:09:35 +0800 Subject: [PATCH 11/40] chore: fix webpack dependencies order (#8542) --- web/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/package.json b/web/package.json index bc532fb242..3289efdb1b 100644 --- a/web/package.json +++ b/web/package.json @@ -37,6 +37,7 @@ "@remixicon/react": "^4.2.0", "@sentry/react": "^7.54.0", "@sentry/utils": "^7.54.0", + "@svgdotjs/svg.js": "^3.2.4", "@tailwindcss/line-clamp": "^0.4.4", "@tailwindcss/typography": "^0.5.9", "ahooks": "^3.7.5", @@ -44,7 +45,6 @@ "classnames": "^2.3.2", "copy-to-clipboard": "^3.3.3", "crypto-js": "^4.2.0", - "@svgdotjs/svg.js": "^3.2.4", "dayjs": "^1.11.7", "echarts": "^5.4.1", "echarts-for-react": "^3.0.2", From 7f3282ec04d87cfb8fcff892e824c96094b92636 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Fri, 20 Sep 2024 18:24:03 +0800 Subject: [PATCH 12/40] Update version to 0.8.3 in packaging and docker-compose files (#8590) --- api/configs/packaging/__init__.py | 2 +- docker-legacy/docker-compose.yaml | 6 +++--- docker/docker-compose.yaml | 6 +++--- web/package.json | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index 3815a6fca2..c752660122 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings): CURRENT_VERSION: str = Field( description="Dify version", - default="0.8.2", + default="0.8.3", ) COMMIT_SHA: str = Field( diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml index f8c5700cd9..513915a1f1 100644 --- a/docker-legacy/docker-compose.yaml +++ b/docker-legacy/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3' services: # API service api: - image: langgenius/dify-api:0.8.2 + image: langgenius/dify-api:0.8.3 restart: always environment: # Startup mode, 'api' starts the API server. @@ -227,7 +227,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.8.2 + image: langgenius/dify-api:0.8.3 restart: always environment: CONSOLE_WEB_URL: '' @@ -396,7 +396,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.8.2 + image: langgenius/dify-web:0.8.3 restart: always environment: # The base URL of console application api server, refers to the Console base URL of WEB service if console domain is diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 0fbc695177..e72c3724f9 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -208,7 +208,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:0.8.2 + image: langgenius/dify-api:0.8.3 restart: always environment: # Use the shared environment variables. @@ -228,7 +228,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.8.2 + image: langgenius/dify-api:0.8.3 restart: always environment: # Use the shared environment variables. @@ -247,7 +247,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.8.2 + image: langgenius/dify-web:0.8.3 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} diff --git a/web/package.json b/web/package.json index 3289efdb1b..b775d87184 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.8.2", + "version": "0.8.3", "private": true, "engines": { "node": ">=18.17.0" From e0a330756368f66a8b7986f6d4ea7fcbbf76b145 Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 20 Sep 2024 19:47:25 +0800 Subject: [PATCH 13/40] fix(workflow): "Max submit count reached" error occurred when executing workflow as tool in iteration (#8595) --- api/core/workflow/graph_engine/graph_engine.py | 10 +++++++--- api/core/workflow/nodes/iteration/iteration_node.py | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index 57e4f716fd..8342dbd13d 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -180,16 +180,20 @@ class GraphEngine: # trigger graph run success event yield GraphRunSucceededEvent(outputs=self.graph_runtime_state.outputs) + self._release_thread() except GraphRunFailedError as e: yield GraphRunFailedEvent(error=e.error) + self._release_thread() return except Exception as e: logger.exception("Unknown Error when graph running") yield GraphRunFailedEvent(error=str(e)) + self._release_thread() raise e - finally: - if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping: - del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id] + + def _release_thread(self): + if self.is_main_thread_pool and self.thread_pool_id in GraphEngine.workflow_thread_pool_mapping: + del GraphEngine.workflow_thread_pool_mapping[self.thread_pool_id] def _run( self, diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index 6f20745daf..01bb4e9076 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -89,6 +89,7 @@ class IterationNode(BaseNode): variable_pool=variable_pool, max_execution_steps=dify_config.WORKFLOW_MAX_EXECUTION_STEPS, max_execution_time=dify_config.WORKFLOW_MAX_EXECUTION_TIME, + thread_pool_id=self.thread_pool_id, ) start_at = datetime.now(timezone.utc).replace(tzinfo=None) From d63a5a1c3ce526df43984899b6c2234a4d2aaf3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= Date: Sat, 21 Sep 2024 17:30:30 +0800 Subject: [PATCH 14/40] fix: a helper link error (#8508) --- .../app/configuration/dataset-config/settings-modal/index.tsx | 2 +- web/app/components/datasets/create/step-two/index.tsx | 2 +- .../components/datasets/hit-testing/modify-retrieval-modal.tsx | 2 +- web/app/components/datasets/settings/form/index.tsx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx index 65858ce8cf..e538c347d9 100644 --- a/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx +++ b/web/app/components/app/configuration/dataset-config/settings-modal/index.tsx @@ -263,7 +263,7 @@ const SettingsModal: FC = ({
{t('datasetSettings.form.retrievalSetting.title')}
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 94614918db..f4fc58ee2a 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -820,7 +820,7 @@ const StepTwo = ({
{t('datasetSettings.form.retrievalSetting.title')}
diff --git a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx index 999f1cdf0d..1fc5b68d67 100644 --- a/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx +++ b/web/app/components/datasets/hit-testing/modify-retrieval-modal.tsx @@ -77,7 +77,7 @@ const ModifyRetrievalModal: FC = ({
{t('datasetSettings.form.retrievalSetting.title')}
diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index 0f6bdd0a59..15b8abc242 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -245,7 +245,7 @@ const Form = () => {
{t('datasetSettings.form.retrievalSetting.title')}
From 483ead55d5d0e337f2b82a8dab2cdf501f390caa Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 21 Sep 2024 17:30:43 +0800 Subject: [PATCH 15/40] chore: translate i18n files (#8557) Co-authored-by: iamjoel <2120155+iamjoel@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> --- web/i18n/de-DE/dataset-creation.ts | 1 + web/i18n/es-ES/dataset-creation.ts | 1 + web/i18n/fa-IR/dataset-creation.ts | 1 + web/i18n/fr-FR/dataset-creation.ts | 1 + web/i18n/hi-IN/dataset-creation.ts | 1 + web/i18n/it-IT/dataset-creation.ts | 1 + web/i18n/ja-JP/dataset-creation.ts | 1 + web/i18n/ko-KR/dataset-creation.ts | 1 + web/i18n/pl-PL/dataset-creation.ts | 1 + web/i18n/pt-BR/dataset-creation.ts | 1 + web/i18n/ro-RO/dataset-creation.ts | 1 + web/i18n/ru-RU/dataset-creation.ts | 1 + web/i18n/tr-TR/dataset-creation.ts | 1 + web/i18n/uk-UA/dataset-creation.ts | 1 + web/i18n/vi-VN/dataset-creation.ts | 1 + web/i18n/zh-Hant/dataset-creation.ts | 1 + 16 files changed, 16 insertions(+) diff --git a/web/i18n/de-DE/dataset-creation.ts b/web/i18n/de-DE/dataset-creation.ts index 8b27395049..5251cc94e8 100644 --- a/web/i18n/de-DE/dataset-creation.ts +++ b/web/i18n/de-DE/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'Wissenseinstellungen.', websiteSource: 'Preprocess-Website', webpageUnit: 'Seiten', + separatorTip: 'Ein Trennzeichen ist das Zeichen, das zum Trennen von Text verwendet wird. \\n\\n und \\n sind häufig verwendete Trennzeichen zum Trennen von Absätzen und Zeilen. In Kombination mit Kommas (\\n\\n,\\n) werden Absätze nach Zeilen segmentiert, wenn die maximale Blocklänge überschritten wird. Sie können auch spezielle, von Ihnen selbst definierte Trennzeichen verwenden (z. B. ***).', }, stepThree: { creationTitle: '🎉 Wissen erstellt', diff --git a/web/i18n/es-ES/dataset-creation.ts b/web/i18n/es-ES/dataset-creation.ts index 132c9cbb9b..e093632bc4 100644 --- a/web/i18n/es-ES/dataset-creation.ts +++ b/web/i18n/es-ES/dataset-creation.ts @@ -138,6 +138,7 @@ const translation = { indexSettingTip: 'Para cambiar el método de índice, por favor ve a la ', retrievalSettingTip: 'Para cambiar el método de índice, por favor ve a la ', datasetSettingLink: 'configuración del conocimiento.', + separatorTip: 'Un delimitador es el carácter que se utiliza para separar el texto. \\n\\n y \\n son delimitadores comúnmente utilizados para separar párrafos y líneas. Combinado con comas (\\n\\n,\\n), los párrafos se segmentarán por líneas cuando excedan la longitud máxima del fragmento. También puede utilizar delimitadores especiales definidos por usted mismo (por ejemplo, ***).', }, stepThree: { creationTitle: '🎉 Conocimiento creado', diff --git a/web/i18n/fa-IR/dataset-creation.ts b/web/i18n/fa-IR/dataset-creation.ts index e6e6ad5bfb..40dae4b02d 100644 --- a/web/i18n/fa-IR/dataset-creation.ts +++ b/web/i18n/fa-IR/dataset-creation.ts @@ -138,6 +138,7 @@ const translation = { indexSettingTip: 'برای تغییر روش شاخص، لطفاً به', retrievalSettingTip: 'برای تغییر روش شاخص، لطفاً به', datasetSettingLink: 'تنظیمات دانش بروید.', + separatorTip: 'جداکننده نویسه ای است که برای جداسازی متن استفاده می شود. \\n\\n و \\n معمولا برای جداسازی پاراگراف ها و خطوط استفاده می شوند. همراه با کاما (\\n\\n,\\n)، پاراگراف ها زمانی که از حداکثر طول تکه فراتر می روند، با خطوط تقسیم بندی می شوند. همچنین می توانید از جداکننده های خاصی که توسط خودتان تعریف شده اند استفاده کنید (مثلا ***).', }, stepThree: { creationTitle: ' دانش ایجاد شد', diff --git a/web/i18n/fr-FR/dataset-creation.ts b/web/i18n/fr-FR/dataset-creation.ts index c08a3e5731..38066885d0 100644 --- a/web/i18n/fr-FR/dataset-creation.ts +++ b/web/i18n/fr-FR/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'Paramètres de connaissance.', webpageUnit: 'Pages', websiteSource: 'Site web de prétraitement', + separatorTip: 'Un délimiteur est le caractère utilisé pour séparer le texte. \\n\\n et \\n sont des délimiteurs couramment utilisés pour séparer les paragraphes et les lignes. Combiné à des virgules (\\n\\n,\\n), les paragraphes seront segmentés par des lignes lorsqu’ils dépasseront la longueur maximale des morceaux. Vous pouvez également utiliser des délimiteurs spéciaux définis par vous-même (par exemple ***).', }, stepThree: { creationTitle: '🎉 Connaissance créée', diff --git a/web/i18n/hi-IN/dataset-creation.ts b/web/i18n/hi-IN/dataset-creation.ts index 0fa71acf4a..19c396081a 100644 --- a/web/i18n/hi-IN/dataset-creation.ts +++ b/web/i18n/hi-IN/dataset-creation.ts @@ -155,6 +155,7 @@ const translation = { indexSettingTip: 'इंडेक्स विधि बदलने के लिए, कृपया जाएं ', retrievalSettingTip: 'इंडेक्स विधि बदलने के लिए, कृपया जाएं ', datasetSettingLink: 'ज्ञान सेटिंग्स।', + separatorTip: 'एक सीमांकक पाठ को अलग करने के लिए उपयोग किया जाने वाला वर्ण है। \\n\\n और \\n आमतौर पर पैराग्राफ और लाइनों को अलग करने के लिए उपयोग किए जाने वाले सीमांकक हैं। अल्पविराम (\\n\\n,\\n) के साथ संयुक्त, अधिकतम खंड लंबाई से अधिक होने पर अनुच्छेदों को पंक्तियों द्वारा खंडित किया जाएगा। आप स्वयं द्वारा परिभाषित विशेष सीमांकक का भी उपयोग कर सकते हैं (उदा. ***).', }, stepThree: { creationTitle: '🎉 ज्ञान बनाया गया', diff --git a/web/i18n/it-IT/dataset-creation.ts b/web/i18n/it-IT/dataset-creation.ts index 1629776bf3..46889b80a6 100644 --- a/web/i18n/it-IT/dataset-creation.ts +++ b/web/i18n/it-IT/dataset-creation.ts @@ -158,6 +158,7 @@ const translation = { indexSettingTip: 'Per cambiare il metodo di indicizzazione, vai alle ', retrievalSettingTip: 'Per cambiare il metodo di indicizzazione, vai alle ', datasetSettingLink: 'impostazioni della Conoscenza.', + separatorTip: 'Un delimitatore è il carattere utilizzato per separare il testo. \\n\\n e \\n sono delimitatori comunemente usati per separare paragrafi e righe. In combinazione con le virgole (\\n\\n,\\n), i paragrafi verranno segmentati per righe quando superano la lunghezza massima del blocco. È inoltre possibile utilizzare delimitatori speciali definiti dall\'utente (ad es. ***).', }, stepThree: { creationTitle: '🎉 Conoscenza creata', diff --git a/web/i18n/ja-JP/dataset-creation.ts b/web/i18n/ja-JP/dataset-creation.ts index e6d204840a..d11a0c94e9 100644 --- a/web/i18n/ja-JP/dataset-creation.ts +++ b/web/i18n/ja-JP/dataset-creation.ts @@ -138,6 +138,7 @@ const translation = { indexSettingTip: 'インデックス方法を変更するには、', retrievalSettingTip: '検索方法を変更するには、', datasetSettingLink: 'ナレッジ設定', + separatorTip: '区切り文字は、テキストを区切るために使用される文字です。\\n\\n と \\n は、段落と行を区切るために一般的に使用される区切り記号です。カンマ (\\n\\n,\\n) と組み合わせると、最大チャンク長を超えると、段落は行で区切られます。自分で定義した特別な区切り文字を使用することもできます(例:***)。', }, stepThree: { creationTitle: '🎉 ナレッジが作成されました', diff --git a/web/i18n/ko-KR/dataset-creation.ts b/web/i18n/ko-KR/dataset-creation.ts index e8851acd2f..6a4126d8db 100644 --- a/web/i18n/ko-KR/dataset-creation.ts +++ b/web/i18n/ko-KR/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: '지식 설정', webpageUnit: '페이지', websiteSource: '웹 사이트 전처리', + separatorTip: '구분 기호는 텍스트를 구분하는 데 사용되는 문자입니다. \\n\\n 및 \\n은 단락과 줄을 구분하는 데 일반적으로 사용되는 구분 기호입니다. 쉼표(\\n\\n,\\n)와 함께 사용하면 최대 청크 길이를 초과할 경우 단락이 줄로 분할됩니다. 직접 정의한 특수 구분 기호(예: ***)를 사용할 수도 있습니다.', }, stepThree: { creationTitle: '🎉 지식이 생성되었습니다', diff --git a/web/i18n/pl-PL/dataset-creation.ts b/web/i18n/pl-PL/dataset-creation.ts index 64e50c6b33..f5b36e62ee 100644 --- a/web/i18n/pl-PL/dataset-creation.ts +++ b/web/i18n/pl-PL/dataset-creation.ts @@ -146,6 +146,7 @@ const translation = { datasetSettingLink: 'ustawień Wiedzy.', webpageUnit: 'Stron', websiteSource: 'Witryna internetowa przetwarzania wstępnego', + separatorTip: 'Ogranicznik to znak używany do oddzielania tekstu. \\n\\n i \\n są powszechnie używanymi ogranicznikami do oddzielania akapitów i wierszy. W połączeniu z przecinkami (\\n\\n,\\n), akapity będą segmentowane wierszami po przekroczeniu maksymalnej długości fragmentu. Możesz również skorzystać ze zdefiniowanych przez siebie specjalnych ograniczników (np. ***).', }, stepThree: { creationTitle: '🎉 Utworzono Wiedzę', diff --git a/web/i18n/pt-BR/dataset-creation.ts b/web/i18n/pt-BR/dataset-creation.ts index 4ab78a50c7..511f0d5bcb 100644 --- a/web/i18n/pt-BR/dataset-creation.ts +++ b/web/i18n/pt-BR/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'configurações do Conhecimento.', websiteSource: 'Site de pré-processamento', webpageUnit: 'Páginas', + separatorTip: 'Um delimitador é o caractere usado para separar o texto. \\n\\n e \\n são delimitadores comumente usados para separar parágrafos e linhas. Combinado com vírgulas (\\n\\n,\\n), os parágrafos serão segmentados por linhas ao exceder o comprimento máximo do bloco. Você também pode usar delimitadores especiais definidos por você (por exemplo, ***).', }, stepThree: { creationTitle: '🎉 Conhecimento criado', diff --git a/web/i18n/ro-RO/dataset-creation.ts b/web/i18n/ro-RO/dataset-creation.ts index efe3bb246c..4ea0b04758 100644 --- a/web/i18n/ro-RO/dataset-creation.ts +++ b/web/i18n/ro-RO/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'setările Cunoștinței.', webpageUnit: 'Pagini', websiteSource: 'Site-ul web de preprocesare', + separatorTip: 'Un delimitator este caracterul folosit pentru a separa textul. \\n\\n și \\n sunt delimitatori utilizați în mod obișnuit pentru separarea paragrafelor și liniilor. Combinate cu virgule (\\n\\n,\\n), paragrafele vor fi segmentate pe linii atunci când depășesc lungimea maximă a bucății. De asemenea, puteți utiliza delimitatori speciali definiți de dumneavoastră (de exemplu, ***).', }, stepThree: { creationTitle: '🎉 Cunoștință creată', diff --git a/web/i18n/ru-RU/dataset-creation.ts b/web/i18n/ru-RU/dataset-creation.ts index c4dce774d8..c97daeeece 100644 --- a/web/i18n/ru-RU/dataset-creation.ts +++ b/web/i18n/ru-RU/dataset-creation.ts @@ -138,6 +138,7 @@ const translation = { indexSettingTip: 'Чтобы изменить метод индексации, пожалуйста, перейдите в ', retrievalSettingTip: 'Чтобы изменить метод индексации, пожалуйста, перейдите в ', datasetSettingLink: 'настройки базы знаний.', + separatorTip: 'Разделитель — это символ, используемый для разделения текста. \\n\\n и \\n — это часто используемые разделители для разделения абзацев и строк. В сочетании с запятыми (\\n\\n,\\n) абзацы будут сегментированы по строкам, если максимальная длина блока превышает их. Вы также можете использовать специальные разделители, определенные вами (например, ***).', }, stepThree: { creationTitle: '🎉 База знаний создана', diff --git a/web/i18n/tr-TR/dataset-creation.ts b/web/i18n/tr-TR/dataset-creation.ts index b26608c39f..c29e3045b8 100644 --- a/web/i18n/tr-TR/dataset-creation.ts +++ b/web/i18n/tr-TR/dataset-creation.ts @@ -138,6 +138,7 @@ const translation = { indexSettingTip: 'Dizin yöntemini değiştirmek için, lütfen', retrievalSettingTip: 'Dizin yöntemini değiştirmek için, lütfen', datasetSettingLink: 'Bilgi ayarlarına gidin.', + separatorTip: 'Sınırlayıcı, metni ayırmak için kullanılan karakterdir. \\n\\n ve \\n, paragrafları ve satırları ayırmak için yaygın olarak kullanılan sınırlayıcılardır. Virgüllerle (\\n\\n,\\n) birleştirildiğinde, paragraflar maksimum öbek uzunluğunu aştığında satırlarla bölünür. Kendiniz tarafından tanımlanan özel sınırlayıcıları da kullanabilirsiniz (örn.', }, stepThree: { creationTitle: '🎉 Bilgi oluşturuldu', diff --git a/web/i18n/uk-UA/dataset-creation.ts b/web/i18n/uk-UA/dataset-creation.ts index e4a38f41f4..5b2c9503cf 100644 --- a/web/i18n/uk-UA/dataset-creation.ts +++ b/web/i18n/uk-UA/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'Налаштування знань.', webpageUnit: 'Сторінок', websiteSource: 'Веб-сайт попередньої обробки', + separatorTip: 'Роздільник – це символ, який використовується для поділу тексту. \\n\\n та \\n є часто використовуваними роздільниками для відокремлення абзаців та рядків. У поєднанні з комами (\\n\\n,\\n) абзаци будуть розділені лініями, якщо вони перевищують максимальну довжину фрагмента. Ви також можете використовувати спеціальні роздільники, визначені вами (наприклад, ***).', }, stepThree: { creationTitle: '🎉 Знання створено', diff --git a/web/i18n/vi-VN/dataset-creation.ts b/web/i18n/vi-VN/dataset-creation.ts index da69020287..af49575b90 100644 --- a/web/i18n/vi-VN/dataset-creation.ts +++ b/web/i18n/vi-VN/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: 'cài đặt Kiến thức.', websiteSource: 'Trang web tiền xử lý', webpageUnit: 'Trang', + separatorTip: 'Dấu phân cách là ký tự được sử dụng để phân tách văn bản. \\n\\n và \\n là dấu phân cách thường được sử dụng để tách các đoạn văn và dòng. Kết hợp với dấu phẩy (\\n\\n,\\n), các đoạn văn sẽ được phân đoạn theo các dòng khi vượt quá độ dài đoạn tối đa. Bạn cũng có thể sử dụng dấu phân cách đặc biệt do chính bạn xác định (ví dụ: ***).', }, stepThree: { creationTitle: '🎉 Kiến thức đã được tạo', diff --git a/web/i18n/zh-Hant/dataset-creation.ts b/web/i18n/zh-Hant/dataset-creation.ts index fd810d41c1..73a57db6a0 100644 --- a/web/i18n/zh-Hant/dataset-creation.ts +++ b/web/i18n/zh-Hant/dataset-creation.ts @@ -133,6 +133,7 @@ const translation = { datasetSettingLink: '知識庫設定。', websiteSource: '預處理網站', webpageUnit: '頁面', + separatorTip: '分隔符是用於分隔文字的字元。\\n\\n 和 \\n 是分隔段落和行的常用分隔符。與逗號 (\\n\\n,\\n) 組合使用時,當超過最大區塊長度時,段落將按行分段。您也可以使用自定義的特殊分隔符(例如 ***)。', }, stepThree: { creationTitle: '🎉 知識庫已建立', From e75c33a56166c0825faa1d07d955086a4358d125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=B9=E7=A8=8B?= <1787003204@qq.com> Date: Sat, 21 Sep 2024 17:30:58 +0800 Subject: [PATCH 16/40] Enhance Readme Documentation to Clarify the Importance of Celery Service (#8558) --- api/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/api/README.md b/api/README.md index 70ca2e86a8..bab33f9293 100644 --- a/api/README.md +++ b/api/README.md @@ -65,14 +65,12 @@ 8. Start Dify [web](../web) service. 9. Setup your application by visiting `http://localhost:3000`... -10. If you need to debug local async processing, please start the worker service. +10. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service. ```bash poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion ``` - The started celery app handles the async tasks, e.g. dataset importing and documents indexing. - ## Testing 1. Install dependencies for both the backend and the test environment From b3cb97f0adff4b670d95bc5146d15c12382e1ead Mon Sep 17 00:00:00 2001 From: WalterMitty Date: Sat, 21 Sep 2024 17:31:49 +0800 Subject: [PATCH 17/40] docs: Update ssrf_proxy related doc link in docker-compose file (#8516) --- docker-legacy/docker-compose.middleware.yaml | 2 +- docker-legacy/docker-compose.yaml | 2 +- docker/docker-compose.middleware.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker-legacy/docker-compose.middleware.yaml b/docker-legacy/docker-compose.middleware.yaml index fadbb3e608..da54fe33fd 100644 --- a/docker-legacy/docker-compose.middleware.yaml +++ b/docker-legacy/docker-compose.middleware.yaml @@ -73,7 +73,7 @@ services: # ssrf_proxy server # for more information, please refer to - # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed + # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed ssrf_proxy: image: ubuntu/squid:latest restart: always diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml index 513915a1f1..1636bb6a21 100644 --- a/docker-legacy/docker-compose.yaml +++ b/docker-legacy/docker-compose.yaml @@ -500,7 +500,7 @@ services: # ssrf_proxy server # for more information, please refer to - # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed + # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed ssrf_proxy: image: ubuntu/squid:latest restart: always diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index 251c62fee1..d7900def73 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -63,7 +63,7 @@ services: # ssrf_proxy server # for more information, please refer to - # https://docs.dify.ai/learn-more/faq/self-host-faq#id-18.-why-is-ssrf_proxy-needed + # https://docs.dify.ai/learn-more/faq/install-faq#id-18.-why-is-ssrf_proxy-needed ssrf_proxy: image: ubuntu/squid:latest restart: always From 5541248264fdf69392917a25924786a1b834e91e Mon Sep 17 00:00:00 2001 From: Hongbin <61153998+BingGeX@users.noreply.github.com> Date: Sat, 21 Sep 2024 17:33:15 +0800 Subject: [PATCH 18/40] =?UTF-8?q?Update=20the=20PerfXCloud=20provider=20mo?= =?UTF-8?q?del=20list=EF=BC=8CUpdate=20PerfXCloudProvider=20validate=5Fpro?= =?UTF-8?q?vider=5Fcredentials=20method.=20(#8587)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: xhb <466010723@qq.com> --- .../perfxcloud/llm/Llama3-Chinese_v2.yaml | 1 + .../Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml | 1 + .../llm/Meta-Llama-3-8B-Instruct.yaml | 1 + ...Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml | 1 + .../llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml | 1 + .../perfxcloud/llm/Qwen1.5-7B.yaml | 1 + .../llm/Qwen2-72B-Instruct-AWQ-int4.yaml | 61 ++++++++++++++++++ .../llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml | 1 + .../perfxcloud/llm/Qwen2-7B-Instruct.yaml | 63 +++++++++++++++++++ .../perfxcloud/llm/Qwen2-7B.yaml | 1 + .../perfxcloud/llm/Qwen2.5-72B-Instruct.yaml | 61 ++++++++++++++++++ .../perfxcloud/llm/Qwen2.5-7B-Instruct.yaml | 61 ++++++++++++++++++ .../llm/Reflection-Llama-3.1-70B.yaml | 61 ++++++++++++++++++ .../perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml | 61 ++++++++++++++++++ .../perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml | 61 ++++++++++++++++++ .../perfxcloud/llm/Yi-Coder-9B-Chat.yaml | 61 ++++++++++++++++++ .../perfxcloud/llm/_position.yaml | 25 +++++--- .../perfxcloud/llm/chatglm3-6b.yaml | 1 + .../perfxcloud/llm/deepseek-v2-chat.yaml | 1 + .../perfxcloud/llm/deepseek-v2-lite-chat.yaml | 1 + .../model_providers/perfxcloud/perfxcloud.py | 20 +----- .../text_embedding/gte-Qwen2-7B-instruct.yaml | 4 ++ 22 files changed, 523 insertions(+), 27 deletions(-) create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml create mode 100644 api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml index 87712874b9..bf91468fcf 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml index f16f3de60b..781b837e8e 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml index 21267c240b..67210e9020 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml index 80c7ec40f2..482632ff06 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml index 841dd97f35..ddb6fd977c 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml index 33d5d12b22..024c79dbcf 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml new file mode 100644 index 0000000000..94f661f40d --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml @@ -0,0 +1,61 @@ +model: Qwen2-72B-Instruct-AWQ-int4 +label: + en_US: Qwen2-72B-Instruct-AWQ-int4 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml index 62255cc7d2..a06f8d5ab1 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml @@ -61,3 +61,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml new file mode 100644 index 0000000000..4369411399 --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml @@ -0,0 +1,63 @@ +model: Qwen2-7B-Instruct +label: + en_US: Qwen2-7B-Instruct +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: completion + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.3 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 2000 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml index 2f3f1f0225..d549ecd227 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml @@ -61,3 +61,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml new file mode 100644 index 0000000000..15cbf01f1f --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml @@ -0,0 +1,61 @@ +model: Qwen2.5-72B-Instruct +label: + en_US: Qwen2.5-72B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 30720 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml new file mode 100644 index 0000000000..dadc8f8f32 --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml @@ -0,0 +1,61 @@ +model: Qwen2.5-7B-Instruct +label: + en_US: Qwen2.5-7B-Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml new file mode 100644 index 0000000000..649be20b48 --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml @@ -0,0 +1,61 @@ +model: Reflection-Llama-3.1-70B +label: + en_US: Reflection-Llama-3.1-70B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 10240 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml new file mode 100644 index 0000000000..92eae6804f --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml @@ -0,0 +1,61 @@ +model: Yi-1_5-9B-Chat-16K +label: + en_US: Yi-1_5-9B-Chat-16K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 16384 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml new file mode 100644 index 0000000000..0e21ce148c --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml @@ -0,0 +1,61 @@ +model: Yi-Coder-1.5B-Chat +label: + en_US: Yi-Coder-1.5B-Chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 20480 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml new file mode 100644 index 0000000000..23b0841ce4 --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml @@ -0,0 +1,61 @@ +model: Yi-Coder-9B-Chat +label: + en_US: Yi-Coder-9B-Chat +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 20480 +parameter_rules: + - name: temperature + use_template: temperature + type: float + default: 0.5 + min: 0.0 + max: 2.0 + help: + zh_Hans: 用于控制随机性和多样性的程度。具体来说,temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值,使得更多的低概率词被选择,生成结果更加多样化;而较低的temperature值则会增强概率分布的峰值,使得高概率词更容易被选择,生成结果更加确定。 + en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain. + - name: max_tokens + use_template: max_tokens + type: int + default: 600 + min: 1 + max: 1248 + help: + zh_Hans: 用于指定模型在生成内容时token的最大数量,它定义了生成的上限,但不保证每次都会生成到这个数量。 + en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time. + - name: top_p + use_template: top_p + type: float + default: 0.8 + min: 0.1 + max: 0.9 + help: + zh_Hans: 生成过程中核采样方法概率阈值,例如,取值为0.8时,仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为(0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。 + en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated. + - name: top_k + type: int + min: 0 + max: 99 + label: + zh_Hans: 取样数量 + en_US: Top k + help: + zh_Hans: 生成时,采样候选集的大小。例如,取值为50时,仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大,生成的随机性越高;取值越小,生成的确定性越高。 + en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. + - name: repetition_penalty + required: false + type: float + default: 1.1 + label: + en_US: Repetition penalty + help: + zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。 + en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment. +pricing: + input: "0.000" + output: "0.000" + unit: "0.000" + currency: RMB diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml index 2c9eac0e49..37bf400f1e 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml @@ -1,15 +1,24 @@ -- Meta-Llama-3.1-405B-Instruct-AWQ-INT4 -- Meta-Llama-3.1-8B-Instruct -- Meta-Llama-3-70B-Instruct-GPTQ-Int4 -- Meta-Llama-3-8B-Instruct -- Qwen2-72B-Instruct-GPTQ-Int4 +- Qwen2.5-72B-Instruct +- Qwen2.5-7B-Instruct +- Yi-Coder-1.5B-Chat +- Yi-Coder-9B-Chat +- Qwen2-72B-Instruct-AWQ-int4 +- Yi-1_5-9B-Chat-16K +- Qwen2-7B-Instruct +- Reflection-Llama-3.1-70B - Qwen2-72B-Instruct +- Meta-Llama-3.1-8B-Instruct + +- Meta-Llama-3.1-405B-Instruct-AWQ-INT4 +- Meta-Llama-3-70B-Instruct-GPTQ-Int4 +- chatglm3-6b +- Meta-Llama-3-8B-Instruct +- Llama3-Chinese_v2 +- deepseek-v2-lite-chat +- Qwen2-72B-Instruct-GPTQ-Int4 - Qwen2-7B - Qwen-14B-Chat-Int4 - Qwen1.5-72B-Chat-GPTQ-Int4 - Qwen1.5-7B - Qwen1.5-110B-Chat-GPTQ-Int4 - deepseek-v2-chat -- deepseek-v2-lite-chat -- Llama3-Chinese_v2 -- chatglm3-6b diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml index f9c26b7f90..75d80f784a 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml index 078922ef95..fa9a7b7175 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml index 4ff3af7b51..75a26d2505 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml +++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml @@ -59,3 +59,4 @@ pricing: output: "0.000" unit: "0.000" currency: RMB +deprecated: true diff --git a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py index 450d22fb75..9a4ead031d 100644 --- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py +++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py @@ -1,7 +1,5 @@ import logging -from core.model_runtime.entities.model_entities import ModelType -from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.model_providers.__base.model_provider import ModelProvider logger = logging.getLogger(__name__) @@ -9,20 +7,4 @@ logger = logging.getLogger(__name__) class PerfXCloudProvider(ModelProvider): def validate_provider_credentials(self, credentials: dict) -> None: - """ - Validate provider credentials - if validate failed, raise exception - - :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. - """ - try: - model_instance = self.get_model_instance(ModelType.LLM) - - # Use `Qwen2_72B_Chat_GPTQ_Int4` model for validate, - # no matter what model you pass in, text completion model or chat model - model_instance.validate_credentials(model="Qwen2-72B-Instruct-GPTQ-Int4", credentials=credentials) - except CredentialsValidateFailedError as ex: - raise ex - except Exception as ex: - logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") - raise ex + pass diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml new file mode 100644 index 0000000000..03db0d8bce --- /dev/null +++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml @@ -0,0 +1,4 @@ +model: gte-Qwen2-7B-instruct +model_type: text-embedding +model_properties: + context_size: 2048 From 5ddb601e4345a5099668618d5e0e7e6c3dd56c79 Mon Sep 17 00:00:00 2001 From: AAEE86 <33052466+AAEE86@users.noreply.github.com> Date: Sat, 21 Sep 2024 18:08:07 +0800 Subject: [PATCH 19/40] add MixtralAI Model (#8517) --- .../mistralai/llm/_position.yaml | 5 ++ .../mistralai/llm/codestral-latest.yaml | 51 +++++++++++++++++++ .../mistralai/llm/mistral-embed.yaml | 51 +++++++++++++++++++ .../mistralai/llm/open-codestral-mamba.yaml | 51 +++++++++++++++++++ .../mistralai/llm/open-mistral-nemo.yaml | 51 +++++++++++++++++++ .../mistralai/llm/pixtral-12b-2409.yaml | 51 +++++++++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml create mode 100644 api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml create mode 100644 api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml create mode 100644 api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml create mode 100644 api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml diff --git a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml index 751003d71e..bdb06b7fff 100644 --- a/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/mistralai/llm/_position.yaml @@ -1,3 +1,8 @@ +- pixtral-12b-2409 +- codestral-latest +- mistral-embed +- open-mistral-nemo +- open-codestral-mamba - open-mistral-7b - open-mixtral-8x7b - open-mixtral-8x22b diff --git a/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml new file mode 100644 index 0000000000..5f1260233f --- /dev/null +++ b/api/core/model_runtime/model_providers/mistralai/llm/codestral-latest.yaml @@ -0,0 +1,51 @@ +model: codestral-latest +label: + zh_Hans: codestral-latest + en_US: codestral-latest +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.7 + min: 0 + max: 1 + - name: top_p + use_template: top_p + default: 1 + min: 0 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 4096 + - name: safe_prompt + default: false + type: boolean + help: + en_US: Whether to inject a safety prompt before all conversations. + zh_Hans: 是否开启提示词审查 + label: + en_US: SafePrompt + zh_Hans: 提示词审查 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: RandomSeed + zh_Hans: 随机数种子 + default: 0 + min: 0 + max: 2147483647 +pricing: + input: '0.008' + output: '0.024' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml new file mode 100644 index 0000000000..d759103d08 --- /dev/null +++ b/api/core/model_runtime/model_providers/mistralai/llm/mistral-embed.yaml @@ -0,0 +1,51 @@ +model: mistral-embed +label: + zh_Hans: mistral-embed + en_US: mistral-embed +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.7 + min: 0 + max: 1 + - name: top_p + use_template: top_p + default: 1 + min: 0 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 1024 + - name: safe_prompt + default: false + type: boolean + help: + en_US: Whether to inject a safety prompt before all conversations. + zh_Hans: 是否开启提示词审查 + label: + en_US: SafePrompt + zh_Hans: 提示词审查 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: RandomSeed + zh_Hans: 随机数种子 + default: 0 + min: 0 + max: 2147483647 +pricing: + input: '0.008' + output: '0.024' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml new file mode 100644 index 0000000000..d7ffb9ea02 --- /dev/null +++ b/api/core/model_runtime/model_providers/mistralai/llm/open-codestral-mamba.yaml @@ -0,0 +1,51 @@ +model: open-codestral-mamba +label: + zh_Hans: open-codestral-mamba + en_US: open-codestral-mamba +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 256000 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.7 + min: 0 + max: 1 + - name: top_p + use_template: top_p + default: 1 + min: 0 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 16384 + - name: safe_prompt + default: false + type: boolean + help: + en_US: Whether to inject a safety prompt before all conversations. + zh_Hans: 是否开启提示词审查 + label: + en_US: SafePrompt + zh_Hans: 提示词审查 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: RandomSeed + zh_Hans: 随机数种子 + default: 0 + min: 0 + max: 2147483647 +pricing: + input: '0.008' + output: '0.024' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml new file mode 100644 index 0000000000..dcda4fbce7 --- /dev/null +++ b/api/core/model_runtime/model_providers/mistralai/llm/open-mistral-nemo.yaml @@ -0,0 +1,51 @@ +model: open-mistral-nemo +label: + zh_Hans: open-mistral-nemo + en_US: open-mistral-nemo +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.7 + min: 0 + max: 1 + - name: top_p + use_template: top_p + default: 1 + min: 0 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8192 + - name: safe_prompt + default: false + type: boolean + help: + en_US: Whether to inject a safety prompt before all conversations. + zh_Hans: 是否开启提示词审查 + label: + en_US: SafePrompt + zh_Hans: 提示词审查 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: RandomSeed + zh_Hans: 随机数种子 + default: 0 + min: 0 + max: 2147483647 +pricing: + input: '0.008' + output: '0.024' + unit: '0.001' + currency: USD diff --git a/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml new file mode 100644 index 0000000000..0b002b49ca --- /dev/null +++ b/api/core/model_runtime/model_providers/mistralai/llm/pixtral-12b-2409.yaml @@ -0,0 +1,51 @@ +model: pixtral-12b-2409 +label: + zh_Hans: pixtral-12b-2409 + en_US: pixtral-12b-2409 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + default: 0.7 + min: 0 + max: 1 + - name: top_p + use_template: top_p + default: 1 + min: 0 + max: 1 + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8192 + - name: safe_prompt + default: false + type: boolean + help: + en_US: Whether to inject a safety prompt before all conversations. + zh_Hans: 是否开启提示词审查 + label: + en_US: SafePrompt + zh_Hans: 提示词审查 + - name: random_seed + type: int + help: + en_US: The seed to use for random sampling. If set, different calls will generate deterministic results. + zh_Hans: 当开启随机数种子以后,你可以通过指定一个固定的种子来使得回答结果更加稳定 + label: + en_US: RandomSeed + zh_Hans: 随机数种子 + default: 0 + min: 0 + max: 2147483647 +pricing: + input: '0.008' + output: '0.024' + unit: '0.001' + currency: USD From 8219f9e0909a1b7ef6b3f887133fc328f62bcff6 Mon Sep 17 00:00:00 2001 From: Nam Vu Date: Sat, 21 Sep 2024 19:49:01 +0700 Subject: [PATCH 20/40] fix: api/core/ops/ops_trace_manager.py (#8501) --- api/core/ops/ops_trace_manager.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 6f17bade97..876790f5b8 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -176,11 +176,18 @@ class OpsTraceManager: return None app: App = db.session.query(App).filter(App.id == app_id).first() + + if app is None: + return None + app_ops_trace_config = json.loads(app.tracing) if app.tracing else None - if app_ops_trace_config is not None: - tracing_provider = app_ops_trace_config.get("tracing_provider") - else: + if app_ops_trace_config is None: + return None + + tracing_provider = app_ops_trace_config.get("tracing_provider") + + if tracing_provider is None or tracing_provider not in provider_config_map: return None # decrypt_token From 1a8dcae10e4e0f7411cce3e48171dde1a79db951 Mon Sep 17 00:00:00 2001 From: AAEE86 <33052466+AAEE86@users.noreply.github.com> Date: Sat, 21 Sep 2024 22:52:10 +0800 Subject: [PATCH 21/40] add Qwen custom add model interface (#8565) --- .../model_providers/tongyi/llm/llm.py | 71 ++++++++++++++++++- .../model_providers/tongyi/tongyi.yaml | 18 +++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index 1d4eba6668..f90c7f075f 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -30,7 +30,15 @@ from core.model_runtime.entities.message_entities import ( ToolPromptMessage, UserPromptMessage, ) -from core.model_runtime.entities.model_entities import ModelFeature +from core.model_runtime.entities.model_entities import ( + AIModelEntity, + FetchFrom, + I18nObject, + ModelFeature, + ModelType, + ParameterRule, + ParameterType, +) from core.model_runtime.errors.invoke import ( InvokeAuthorizationError, InvokeBadRequestError, @@ -520,3 +528,64 @@ class TongyiLargeLanguageModel(LargeLanguageModel): UnsupportedHTTPMethod, ], } + + def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None: + """ + Architecture for defining customizable models + + :param model: model name + :param credentials: model credentials + :return: AIModelEntity or None + """ + rules = [ + ParameterRule( + name="temperature", + type=ParameterType.FLOAT, + use_template="temperature", + label=I18nObject(zh_Hans="温度", en_US="Temperature"), + ), + ParameterRule( + name="top_p", + type=ParameterType.FLOAT, + use_template="top_p", + label=I18nObject(zh_Hans="Top P", en_US="Top P"), + ), + ParameterRule( + name="top_k", + type=ParameterType.INT, + min=0, + max=99, + label=I18nObject(zh_Hans="top_k", en_US="top_k"), + ), + ParameterRule( + name="max_tokens", + type=ParameterType.INT, + min=1, + max=128000, + default=1024, + label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"), + ), + ParameterRule( + name="seed", + type=ParameterType.INT, + default=1234, + label=I18nObject(zh_Hans="随机种子", en_US="Random Seed"), + ), + ParameterRule( + name="repetition_penalty", + type=ParameterType.FLOAT, + default=1.1, + label=I18nObject(zh_Hans="重复惩罚", en_US="Repetition Penalty"), + ), + ] + + entity = AIModelEntity( + model=model, + label=I18nObject(en_US=model), + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_type=ModelType.LLM, + model_properties={}, + parameter_rules=rules, + ) + + return entity diff --git a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml index de2c289c94..fabe6d90e6 100644 --- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml +++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml @@ -20,6 +20,7 @@ supported_model_types: - text-embedding configurate_methods: - predefined-model + - customizable-model provider_credential_schema: credential_form_schemas: - variable: dashscope_api_key @@ -30,3 +31,20 @@ provider_credential_schema: placeholder: zh_Hans: 在此输入您的 API Key en_US: Enter your API Key +model_credential_schema: + model: + label: + en_US: Model Name + zh_Hans: 模型名称 + placeholder: + en_US: Enter full model name + zh_Hans: 输入模型全称 + credential_form_schemas: + - variable: dashscope_api_key + required: true + label: + en_US: API Key + type: secret-input + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key From 831c5a93af83058da71f56c2df1d634ee768e001 Mon Sep 17 00:00:00 2001 From: zhuhao <37029601+hwzhuhao@users.noreply.github.com> Date: Sat, 21 Sep 2024 22:56:37 +0800 Subject: [PATCH 22/40] refactor(ops): Optimize the iteration for filter_none_values and use logging.error to record logs when an exception occurs (#8461) --- api/core/ops/entities/trace_entity.py | 3 +-- api/core/ops/ops_trace_manager.py | 4 ++-- api/core/ops/utils.py | 7 +++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/api/core/ops/entities/trace_entity.py b/api/core/ops/entities/trace_entity.py index f27a0af6e0..db6ce9d9c3 100644 --- a/api/core/ops/entities/trace_entity.py +++ b/api/core/ops/entities/trace_entity.py @@ -21,8 +21,7 @@ class BaseTraceInfo(BaseModel): return None if isinstance(v, str | dict | list): return v - else: - return "" + return "" class WorkflowTraceInfo(BaseTraceInfo): diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 876790f5b8..0200f4a32d 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -708,7 +708,7 @@ class TraceQueueManager: trace_task.app_id = self.app_id trace_manager_queue.put(trace_task) except Exception as e: - logging.debug(f"Error adding trace task: {e}") + logging.error(f"Error adding trace task: {e}") finally: self.start_timer() @@ -727,7 +727,7 @@ class TraceQueueManager: if tasks: self.send_to_celery(tasks) except Exception as e: - logging.debug(f"Error processing trace tasks: {e}") + logging.error(f"Error processing trace tasks: {e}") def start_timer(self): global trace_manager_timer diff --git a/api/core/ops/utils.py b/api/core/ops/utils.py index 498685b342..3cd3fb5756 100644 --- a/api/core/ops/utils.py +++ b/api/core/ops/utils.py @@ -6,12 +6,15 @@ from models.model import Message def filter_none_values(data: dict): + new_data = {} for key, value in data.items(): if value is None: continue if isinstance(value, datetime): - data[key] = value.isoformat() - return {key: value for key, value in data.items() if value is not None} + new_data[key] = value.isoformat() + else: + new_data[key] = value + return new_data def get_message_data(message_id): From b32a7713e024d63d327303b3749d9380fbaa592e Mon Sep 17 00:00:00 2001 From: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Date: Sat, 21 Sep 2024 23:59:50 +0800 Subject: [PATCH 23/40] feat: update pyproject.toml (#8368) --- api/poetry.lock | 100 ++++++++++++++++++++++++--------------------- api/pyproject.toml | 2 +- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/api/poetry.lock b/api/poetry.lock index 28c688cc9c..8d4d680031 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -616,13 +616,13 @@ files = [ [[package]] name = "azure-core" -version = "1.30.2" +version = "1.31.0" description = "Microsoft Azure Core Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-core-1.30.2.tar.gz", hash = "sha256:a14dc210efcd608821aa472d9fb8e8d035d29b68993819147bc290a8ac224472"}, - {file = "azure_core-1.30.2-py3-none-any.whl", hash = "sha256:cf019c1ca832e96274ae85abd3d9f752397194d9fea3b41487290562ac8abe4a"}, + {file = "azure_core-1.31.0-py3-none-any.whl", hash = "sha256:22954de3777e0250029360ef31d80448ef1be13b80a459bff80ba7073379e2cd"}, + {file = "azure_core-1.31.0.tar.gz", hash = "sha256:656a0dd61e1869b1506b7c6a3b31d62f15984b1a573d6326f6aa2f3e4123284b"}, ] [package.dependencies] @@ -828,13 +828,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.17" +version = "1.35.19" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.35.17-py3-none-any.whl", hash = "sha256:a93f773ca93139529b5d36730b382dbee63ab4c7f26129aa5c84835255ca999d"}, - {file = "botocore-1.35.17.tar.gz", hash = "sha256:0d35d03ea647b5d464c7f77bdab6fb23ae5d49752b13cf97ab84444518c7b1bd"}, + {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"}, + {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"}, ] [package.dependencies] @@ -2429,13 +2429,13 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.114.1" +version = "0.114.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.114.1-py3-none-any.whl", hash = "sha256:5d4746f6e4b7dff0b4f6b6c6d5445645285f662fe75886e99af7ee2d6b58bb3e"}, - {file = "fastapi-0.114.1.tar.gz", hash = "sha256:1d7bbbeabbaae0acb0c22f0ab0b040f642d3093ca3645f8c876b6f91391861d8"}, + {file = "fastapi-0.114.2-py3-none-any.whl", hash = "sha256:44474a22913057b1acb973ab90f4b671ba5200482e7622816d79105dcece1ac5"}, + {file = "fastapi-0.114.2.tar.gz", hash = "sha256:0adb148b62edb09e8c6eeefa3ea934e8f276dabc038c5a82989ea6346050c3da"}, ] [package.dependencies] @@ -3990,15 +3990,18 @@ files = [ [[package]] name = "idna" -version = "3.8" +version = "3.9" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" files = [ - {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"}, - {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"}, + {file = "idna-3.9-py3-none-any.whl", hash = "sha256:69297d5da0cc9281c77efffb4e730254dd45943f45bbfb461de5991713989b1e"}, + {file = "idna-3.9.tar.gz", hash = "sha256:e5c5dafde284f26e9e0f28f6ea2d6400abd5ca099864a67f576f3981c6476124"}, ] +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + [[package]] name = "importlib-metadata" version = "6.11.0" @@ -4393,13 +4396,13 @@ six = "*" [[package]] name = "langfuse" -version = "2.48.0" +version = "2.48.1" description = "A client library for accessing langfuse" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langfuse-2.48.0-py3-none-any.whl", hash = "sha256:475b047e461f8a45e3c7d81b6a87e0b9e389c489d465b838aa69cbdd16eeacce"}, - {file = "langfuse-2.48.0.tar.gz", hash = "sha256:46e7e6e6e97fe03115a9f95d7f29b3fcd1848a9d1bb34608ebb42a3931919e45"}, + {file = "langfuse-2.48.1-py3-none-any.whl", hash = "sha256:8661070b6d94ba1d7da92c054f3110b6ecf4489d6e8204a4080f934f3f49ebf2"}, + {file = "langfuse-2.48.1.tar.gz", hash = "sha256:b8117d90babec6be1bc3303b42e0b71848531eae44118e6e0123d03e7961d0fc"}, ] [package.dependencies] @@ -4418,13 +4421,13 @@ openai = ["openai (>=0.27.8)"] [[package]] name = "langsmith" -version = "0.1.118" +version = "0.1.120" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.118-py3-none-any.whl", hash = "sha256:f017127b3efb037da5e46ff4f8583e8192e7955191737240c327f3eadc144d7c"}, - {file = "langsmith-0.1.118.tar.gz", hash = "sha256:ff1ca06c92c6081250244ebbce5d0bb347b9d898d2e9b60a13b11f0f0720f09f"}, + {file = "langsmith-0.1.120-py3-none-any.whl", hash = "sha256:54d2785e301646c0988e0a69ebe4d976488c87b41928b358cb153b6ddd8db62b"}, + {file = "langsmith-0.1.120.tar.gz", hash = "sha256:25499ca187b41bd89d784b272b97a8d76f60e0e21bdf20336e8a2aa6a9b23ac9"}, ] [package.dependencies] @@ -6232,13 +6235,13 @@ xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.3.2" +version = "4.3.3" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.3.2-py3-none-any.whl", hash = "sha256:eb1c8582560b34ed4ba105009a4badf7f6f85768b30126f351328507b2beb617"}, - {file = "platformdirs-4.3.2.tar.gz", hash = "sha256:9e5e27a08aa095dd127b9f2e764d74254f482fef22b0970773bfba79d091ab8c"}, + {file = "platformdirs-4.3.3-py3-none-any.whl", hash = "sha256:50a5450e2e84f44539718293cbb1da0a0885c9d14adf21b77bae4e66fc99d9b5"}, + {file = "platformdirs-4.3.3.tar.gz", hash = "sha256:d4e0b7d8ec176b341fb03cb11ca12d0276faa8c485f9cd218f613840463fc2c0"}, ] [package.extras] @@ -6248,13 +6251,13 @@ type = ["mypy (>=1.11.2)"] [[package]] name = "plotly" -version = "5.24.0" +version = "5.24.1" description = "An open-source, interactive data visualization library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "plotly-5.24.0-py3-none-any.whl", hash = "sha256:0e54efe52c8cef899f7daa41be9ed97dfb6be622613a2a8f56a86a0634b2b67e"}, - {file = "plotly-5.24.0.tar.gz", hash = "sha256:eae9f4f54448682442c92c1e97148e3ad0c52f0cf86306e1b76daba24add554a"}, + {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"}, + {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"}, ] [package.dependencies] @@ -7025,15 +7028,18 @@ files = [ [[package]] name = "pyreadline3" -version = "3.4.3" +version = "3.5.2" description = "A python implementation of GNU readline." optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "pyreadline3-3.4.3-py3-none-any.whl", hash = "sha256:f832c5898f4f9a0f81d48a8c499b39d0179de1a465ea3def1a7e7231840b4ed6"}, - {file = "pyreadline3-3.4.3.tar.gz", hash = "sha256:ebab0baca37f50e2faa1dd99a6da1c75de60e0d68a3b229c134bbd12786250e2"}, + {file = "pyreadline3-3.5.2-py3-none-any.whl", hash = "sha256:a87d56791e2965b2b187e2ea33dcf664600842c997c0623c95cf8ef07db83de9"}, + {file = "pyreadline3-3.5.2.tar.gz", hash = "sha256:ba82292e52c5a3bb256b291af0c40b457c1e8699cac9a873abbcaac8aef3a1bb"}, ] +[package.extras] +dev = ["build", "flake8", "pytest", "twine"] + [[package]] name = "pytest" version = "8.3.3" @@ -8778,13 +8784,13 @@ test = ["pytest", "tornado (>=4.5)", "typeguard"] [[package]] name = "tencentcloud-sdk-python-common" -version = "3.0.1230" +version = "3.0.1231" description = "Tencent Cloud Common SDK for Python" optional = false python-versions = "*" files = [ - {file = "tencentcloud-sdk-python-common-3.0.1230.tar.gz", hash = "sha256:1e0f3bab80026fcb0083820869239b3f8cf30beb8e00e12c213bdecc75eb7577"}, - {file = "tencentcloud_sdk_python_common-3.0.1230-py2.py3-none-any.whl", hash = "sha256:03616c79685c154c689536a9c823d52b855cf49eada70679826a92aff5afd596"}, + {file = "tencentcloud-sdk-python-common-3.0.1231.tar.gz", hash = "sha256:22aa281ca2eac511e1615b2953da7c4a0bec87cf93a05a7a15dbb61b23a215ee"}, + {file = "tencentcloud_sdk_python_common-3.0.1231-py2.py3-none-any.whl", hash = "sha256:bd0f7c4df4b156ec35c8731afa1f498043c7e1cd5d2feb595ee441fdb45a061e"}, ] [package.dependencies] @@ -8792,17 +8798,17 @@ requests = ">=2.16.0" [[package]] name = "tencentcloud-sdk-python-hunyuan" -version = "3.0.1230" +version = "3.0.1231" description = "Tencent Cloud Hunyuan SDK for Python" optional = false python-versions = "*" files = [ - {file = "tencentcloud-sdk-python-hunyuan-3.0.1230.tar.gz", hash = "sha256:900d15cb9dc2217b1282d985898ec7ecf97859351c86c6f7efc74685f08a5f85"}, - {file = "tencentcloud_sdk_python_hunyuan-3.0.1230-py2.py3-none-any.whl", hash = "sha256:604dab0d4d66ea942f23d7980c76b5f0f6af3d68a8374e619331a4dd2910991e"}, + {file = "tencentcloud-sdk-python-hunyuan-3.0.1231.tar.gz", hash = "sha256:6da12f418f14305b3a6b7bb29b6d95bf4038a6b66b81c0e03b8dafc4f6df99ca"}, + {file = "tencentcloud_sdk_python_hunyuan-3.0.1231-py2.py3-none-any.whl", hash = "sha256:21ba28f69c34c15e20900be3f2c06376fcaf7e58265f939833c55631f2348792"}, ] [package.dependencies] -tencentcloud-sdk-python-common = "3.0.1230" +tencentcloud-sdk-python-common = "3.0.1231" [[package]] name = "threadpoolctl" @@ -9205,13 +9211,13 @@ typing-extensions = ">=3.7.4.3" [[package]] name = "types-requests" -version = "2.32.0.20240907" +version = "2.32.0.20240914" description = "Typing stubs for requests" optional = false python-versions = ">=3.8" files = [ - {file = "types-requests-2.32.0.20240907.tar.gz", hash = "sha256:ff33935f061b5e81ec87997e91050f7b4af4f82027a7a7a9d9aaea04a963fdf8"}, - {file = "types_requests-2.32.0.20240907-py3-none-any.whl", hash = "sha256:1d1e79faeaf9d42def77f3c304893dea17a97cae98168ac69f3cb465516ee8da"}, + {file = "types-requests-2.32.0.20240914.tar.gz", hash = "sha256:2850e178db3919d9bf809e434eef65ba49d0e7e33ac92d588f4a5e295fffd405"}, + {file = "types_requests-2.32.0.20240914-py3-none-any.whl", hash = "sha256:59c2f673eb55f32a99b2894faf6020e1a9f4a402ad0f192bfee0b64469054310"}, ] [package.dependencies] @@ -9454,13 +9460,13 @@ files = [ [[package]] name = "urllib3" -version = "2.2.2" +version = "2.2.3" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, - {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, + {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, + {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, ] [package.extras] @@ -9614,12 +9620,12 @@ files = [ [[package]] name = "volcengine-python-sdk" -version = "1.0.100" +version = "1.0.101" description = "Volcengine SDK for Python" optional = false python-versions = "*" files = [ - {file = "volcengine-python-sdk-1.0.100.tar.gz", hash = "sha256:cdc194fe3ce51adda6892d2ca1c43edba3300699321dc6c69119c59fc3b28932"}, + {file = "volcengine-python-sdk-1.0.101.tar.gz", hash = "sha256:1b76e71a6dcf3d5be1b2c058e7d281359e6cca2cc920ffe2567d3115beea1d02"}, ] [package.dependencies] @@ -10215,13 +10221,13 @@ requests = "*" [[package]] name = "zipp" -version = "3.20.1" +version = "3.20.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" files = [ - {file = "zipp-3.20.1-py3-none-any.whl", hash = "sha256:9960cd8967c8f85a56f920d5d507274e74f9ff813a0ab8889a5b5be2daf44064"}, - {file = "zipp-3.20.1.tar.gz", hash = "sha256:c22b14cc4763c5a5b04134207736c107db42e9d3ef2d9779d465f5f1bcba572b"}, + {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"}, + {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"}, ] [package.extras] @@ -10416,4 +10422,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "9173a56b2efea12804c980511e1465fba43c7a3d83b1ad284ee149851ed67fc5" +content-hash = "18924ae12a00bde4438a46168bc167ed69613ab1ab0c387f193cd47ac24379b2" diff --git a/api/pyproject.toml b/api/pyproject.toml index 8c10f1dad9..829c39a12b 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -168,7 +168,7 @@ gunicorn = "~22.0.0" httpx = { version = "~0.27.0", extras = ["socks"] } huggingface-hub = "~0.16.4" jieba = "0.42.1" -langfuse = "^2.36.1" +langfuse = "^2.48.0" langsmith = "^0.1.77" mailchimp-transactional = "~1.0.50" markdown = "~3.5.1" From 8c51d06222d5ada18bab11c4777b7e7feb644fbd Mon Sep 17 00:00:00 2001 From: Hash Brown Date: Sun, 22 Sep 2024 03:15:11 +0800 Subject: [PATCH 24/40] feat: regenerate in `Chat`, `agent` and `Chatflow` app (#7661) --- api/constants/__init__.py | 1 + api/controllers/console/app/completion.py | 1 + api/controllers/console/app/message.py | 2 - api/controllers/console/app/workflow.py | 2 + api/controllers/console/explore/completion.py | 1 + api/controllers/console/explore/message.py | 2 +- api/controllers/service_api/app/message.py | 1 + api/controllers/web/completion.py | 1 + api/controllers/web/message.py | 3 +- api/core/agent/base_agent_runner.py | 5 +- .../app/apps/advanced_chat/app_generator.py | 1 + api/core/app/apps/agent_chat/app_generator.py | 1 + api/core/app/apps/chat/app_generator.py | 1 + .../app/apps/message_based_app_generator.py | 1 + api/core/app/entities/app_invoke_entities.py | 3 + api/core/memory/token_buffer_memory.py | 21 ++- .../prompt/utils/extract_thread_messages.py | 22 +++ api/fields/conversation_fields.py | 1 + api/fields/message_fields.py | 1 + ...bb251_add_parent_message_id_to_messages.py | 36 +++++ api/models/model.py | 1 + api/services/message_service.py | 4 +- .../prompt/test_extract_thread_messages.py | 91 +++++++++++ .../debug-with-multiple-model/chat-item.tsx | 4 +- .../debug/debug-with-single-model/index.tsx | 26 ++- web/app/components/app/log/list.tsx | 153 ++++++++++-------- .../chat/chat-with-history/chat-wrapper.tsx | 25 ++- .../base/chat/chat-with-history/hooks.tsx | 35 +--- .../base/chat/chat/answer/index.tsx | 3 + .../base/chat/chat/answer/operation.tsx | 13 +- web/app/components/base/chat/chat/context.tsx | 3 + web/app/components/base/chat/chat/hooks.ts | 3 +- web/app/components/base/chat/chat/index.tsx | 5 + web/app/components/base/chat/chat/type.ts | 1 + web/app/components/base/chat/constants.ts | 1 + .../chat/embedded-chatbot/chat-wrapper.tsx | 25 ++- .../base/chat/embedded-chatbot/hooks.tsx | 38 +---- web/app/components/base/chat/types.ts | 4 +- web/app/components/base/chat/utils.ts | 57 ++++++- .../assets/vender/line/general/refresh.svg | 1 + .../src/vender/line/general/Refresh.json | 23 +++ .../icons/src/vender/line/general/Refresh.tsx | 16 ++ .../icons/src/vender/line/general/index.ts | 1 + .../components/base/regenerate-btn/index.tsx | 31 ++++ .../workflow/panel/chat-record/index.tsx | 79 +++++---- .../panel/debug-and-preview/chat-wrapper.tsx | 26 ++- .../workflow/panel/debug-and-preview/hooks.ts | 2 + web/hooks/use-app-favicon.ts | 6 +- web/i18n/en-US/app-api.ts | 1 + web/i18n/zh-Hans/app-api.ts | 1 + web/models/log.ts | 1 + 51 files changed, 606 insertions(+), 181 deletions(-) create mode 100644 api/core/prompt/utils/extract_thread_messages.py create mode 100644 api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py create mode 100644 api/tests/unit_tests/core/prompt/test_extract_thread_messages.py create mode 100644 web/app/components/base/icons/assets/vender/line/general/refresh.svg create mode 100644 web/app/components/base/icons/src/vender/line/general/Refresh.json create mode 100644 web/app/components/base/icons/src/vender/line/general/Refresh.tsx create mode 100644 web/app/components/base/regenerate-btn/index.tsx diff --git a/api/constants/__init__.py b/api/constants/__init__.py index e22c3268ef..75eaf81638 100644 --- a/api/constants/__init__.py +++ b/api/constants/__init__.py @@ -1 +1,2 @@ HIDDEN_VALUE = "[__HIDDEN__]" +UUID_NIL = "00000000-0000-0000-0000-000000000000" diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py index 53de51c24d..d3296d3dff 100644 --- a/api/controllers/console/app/completion.py +++ b/api/controllers/console/app/completion.py @@ -109,6 +109,7 @@ class ChatMessageApi(Resource): parser.add_argument("files", type=list, required=False, location="json") parser.add_argument("model_config", type=dict, required=True, location="json") parser.add_argument("conversation_id", type=uuid_value, location="json") + parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json") parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json") args = parser.parse_args() diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py index fe06201982..2fba3e0af0 100644 --- a/api/controllers/console/app/message.py +++ b/api/controllers/console/app/message.py @@ -105,8 +105,6 @@ class ChatMessageListApi(Resource): if rest_count > 0: has_more = True - history_messages = list(reversed(history_messages)) - return InfiniteScrollPagination(data=history_messages, limit=args["limit"], has_more=has_more) diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index b488deb89d..0a693b84e2 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -166,6 +166,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource): parser.add_argument("query", type=str, required=True, location="json", default="") parser.add_argument("files", type=list, location="json") parser.add_argument("conversation_id", type=uuid_value, location="json") + parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json") + args = parser.parse_args() try: diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py index f464692098..125bc1af8c 100644 --- a/api/controllers/console/explore/completion.py +++ b/api/controllers/console/explore/completion.py @@ -100,6 +100,7 @@ class ChatApi(InstalledAppResource): parser.add_argument("query", type=str, required=True, location="json") parser.add_argument("files", type=list, required=False, location="json") parser.add_argument("conversation_id", type=uuid_value, location="json") + parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json") parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json") args = parser.parse_args() diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index 0e0238556c..3d221ff30a 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -51,7 +51,7 @@ class MessageListApi(InstalledAppResource): try: return MessageService.pagination_by_first_id( - app_model, current_user, args["conversation_id"], args["first_id"], args["limit"] + app_model, current_user, args["conversation_id"], args["first_id"], args["limit"], "desc" ) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index e54e6f4903..a70ee89b5e 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -54,6 +54,7 @@ class MessageListApi(Resource): message_fields = { "id": fields.String, "conversation_id": fields.String, + "parent_message_id": fields.String, "inputs": fields.Raw, "query": fields.String, "answer": fields.String(attribute="re_sign_file_url_answer"), diff --git a/api/controllers/web/completion.py b/api/controllers/web/completion.py index 115492b796..45b890dfc4 100644 --- a/api/controllers/web/completion.py +++ b/api/controllers/web/completion.py @@ -96,6 +96,7 @@ class ChatApi(WebApiResource): parser.add_argument("files", type=list, required=False, location="json") parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json") parser.add_argument("conversation_id", type=uuid_value, location="json") + parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json") parser.add_argument("retriever_from", type=str, required=False, default="web_app", location="json") args = parser.parse_args() diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index 0d4047f4ef..2d2a5866c8 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -57,6 +57,7 @@ class MessageListApi(WebApiResource): message_fields = { "id": fields.String, "conversation_id": fields.String, + "parent_message_id": fields.String, "inputs": fields.Raw, "query": fields.String, "answer": fields.String(attribute="re_sign_file_url_answer"), @@ -89,7 +90,7 @@ class MessageListApi(WebApiResource): try: return MessageService.pagination_by_first_id( - app_model, end_user, args["conversation_id"], args["first_id"], args["limit"] + app_model, end_user, args["conversation_id"], args["first_id"], args["limit"], "desc" ) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index d09a9956a4..5295f97bdb 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -32,6 +32,7 @@ from core.model_runtime.entities.message_entities import ( from core.model_runtime.entities.model_entities import ModelFeature from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.utils.encoders import jsonable_encoder +from core.prompt.utils.extract_thread_messages import extract_thread_messages from core.tools.entities.tool_entities import ( ToolParameter, ToolRuntimeVariablePool, @@ -441,10 +442,12 @@ class BaseAgentRunner(AppRunner): .filter( Message.conversation_id == self.message.conversation_id, ) - .order_by(Message.created_at.asc()) + .order_by(Message.created_at.desc()) .all() ) + messages = list(reversed(extract_thread_messages(messages))) + for message in messages: if message.id == self.message.id: continue diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 88e1256ed5..445ef6d0ab 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -121,6 +121,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config), query=query, files=file_objs, + parent_message_id=args.get("parent_message_id"), user_id=user.id, stream=stream, invoke_from=invoke_from, diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py index abf8a332ab..99abccf4f9 100644 --- a/api/core/app/apps/agent_chat/app_generator.py +++ b/api/core/app/apps/agent_chat/app_generator.py @@ -127,6 +127,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator): inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config), query=query, files=file_objs, + parent_message_id=args.get("parent_message_id"), user_id=user.id, stream=stream, invoke_from=invoke_from, diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py index 032556ec4c..9ef1366a0f 100644 --- a/api/core/app/apps/chat/app_generator.py +++ b/api/core/app/apps/chat/app_generator.py @@ -128,6 +128,7 @@ class ChatAppGenerator(MessageBasedAppGenerator): inputs=conversation.inputs if conversation else self._get_cleaned_inputs(inputs, app_config), query=query, files=file_objs, + parent_message_id=args.get("parent_message_id"), user_id=user.id, stream=stream, invoke_from=invoke_from, diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index c4db95cbd0..65b759acf5 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -218,6 +218,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): answer_tokens=0, answer_unit_price=0, answer_price_unit=0, + parent_message_id=getattr(application_generate_entity, "parent_message_id", None), provider_response_latency=0, total_price=0, currency="USD", diff --git a/api/core/app/entities/app_invoke_entities.py b/api/core/app/entities/app_invoke_entities.py index ab8d4e374e..87ca51ef1b 100644 --- a/api/core/app/entities/app_invoke_entities.py +++ b/api/core/app/entities/app_invoke_entities.py @@ -122,6 +122,7 @@ class ChatAppGenerateEntity(EasyUIBasedAppGenerateEntity): """ conversation_id: Optional[str] = None + parent_message_id: Optional[str] = None class CompletionAppGenerateEntity(EasyUIBasedAppGenerateEntity): @@ -138,6 +139,7 @@ class AgentChatAppGenerateEntity(EasyUIBasedAppGenerateEntity): """ conversation_id: Optional[str] = None + parent_message_id: Optional[str] = None class AdvancedChatAppGenerateEntity(AppGenerateEntity): @@ -149,6 +151,7 @@ class AdvancedChatAppGenerateEntity(AppGenerateEntity): app_config: WorkflowUIBasedAppConfig conversation_id: Optional[str] = None + parent_message_id: Optional[str] = None query: str class SingleIterationRunEntity(BaseModel): diff --git a/api/core/memory/token_buffer_memory.py b/api/core/memory/token_buffer_memory.py index d3185c3b11..60b36c50f0 100644 --- a/api/core/memory/token_buffer_memory.py +++ b/api/core/memory/token_buffer_memory.py @@ -11,6 +11,7 @@ from core.model_runtime.entities.message_entities import ( TextPromptMessageContent, UserPromptMessage, ) +from core.prompt.utils.extract_thread_messages import extract_thread_messages from extensions.ext_database import db from models.model import AppMode, Conversation, Message, MessageFile from models.workflow import WorkflowRun @@ -33,8 +34,17 @@ class TokenBufferMemory: # fetch limited messages, and return reversed query = ( - db.session.query(Message.id, Message.query, Message.answer, Message.created_at, Message.workflow_run_id) - .filter(Message.conversation_id == self.conversation.id, Message.answer != "") + db.session.query( + Message.id, + Message.query, + Message.answer, + Message.created_at, + Message.workflow_run_id, + Message.parent_message_id, + ) + .filter( + Message.conversation_id == self.conversation.id, + ) .order_by(Message.created_at.desc()) ) @@ -45,7 +55,12 @@ class TokenBufferMemory: messages = query.limit(message_limit).all() - messages = list(reversed(messages)) + # instead of all messages from the conversation, we only need to extract messages + # that belong to the thread of last message + thread_messages = extract_thread_messages(messages) + thread_messages.pop(0) + messages = list(reversed(thread_messages)) + message_file_parser = MessageFileParser(tenant_id=app_record.tenant_id, app_id=app_record.id) prompt_messages = [] for message in messages: diff --git a/api/core/prompt/utils/extract_thread_messages.py b/api/core/prompt/utils/extract_thread_messages.py new file mode 100644 index 0000000000..e8b626499f --- /dev/null +++ b/api/core/prompt/utils/extract_thread_messages.py @@ -0,0 +1,22 @@ +from constants import UUID_NIL + + +def extract_thread_messages(messages: list[dict]) -> list[dict]: + thread_messages = [] + next_message = None + + for message in messages: + if not message.parent_message_id: + # If the message is regenerated and does not have a parent message, it is the start of a new thread + thread_messages.append(message) + break + + if not next_message: + thread_messages.append(message) + next_message = message.parent_message_id + else: + if next_message in {message.id, UUID_NIL}: + thread_messages.append(message) + next_message = message.parent_message_id + + return thread_messages diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py index 9207314fc2..3dcd88d1de 100644 --- a/api/fields/conversation_fields.py +++ b/api/fields/conversation_fields.py @@ -75,6 +75,7 @@ message_detail_fields = { "metadata": fields.Raw(attribute="message_metadata_dict"), "status": fields.String, "error": fields.String, + "parent_message_id": fields.String, } feedback_stat_fields = {"like": fields.Integer, "dislike": fields.Integer} diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index 3d2df87afb..c938097131 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -62,6 +62,7 @@ retriever_resource_fields = { message_fields = { "id": fields.String, "conversation_id": fields.String, + "parent_message_id": fields.String, "inputs": fields.Raw, "query": fields.String, "answer": fields.String(attribute="re_sign_file_url_answer"), diff --git a/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py new file mode 100644 index 0000000000..fd957eeafb --- /dev/null +++ b/api/migrations/versions/2024_09_11_1012-d57ba9ebb251_add_parent_message_id_to_messages.py @@ -0,0 +1,36 @@ +"""add parent_message_id to messages + +Revision ID: d57ba9ebb251 +Revises: 675b5321501b +Create Date: 2024-09-11 10:12:45.826265 + +""" +import sqlalchemy as sa +from alembic import op + +import models as models + +# revision identifiers, used by Alembic. +revision = 'd57ba9ebb251' +down_revision = '675b5321501b' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.add_column(sa.Column('parent_message_id', models.types.StringUUID(), nullable=True)) + + # Set parent_message_id for existing messages to uuid_nil() to distinguish them from new messages with actual parent IDs or NULLs + op.execute('UPDATE messages SET parent_message_id = uuid_nil() WHERE parent_message_id IS NULL') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.drop_column('parent_message_id') + + # ### end Alembic commands ### diff --git a/api/models/model.py b/api/models/model.py index ae0bc3210b..53940a5a16 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -710,6 +710,7 @@ class Message(db.Model): answer_tokens = db.Column(db.Integer, nullable=False, server_default=db.text("0")) answer_unit_price = db.Column(db.Numeric(10, 4), nullable=False) answer_price_unit = db.Column(db.Numeric(10, 7), nullable=False, server_default=db.text("0.001")) + parent_message_id = db.Column(StringUUID, nullable=True) provider_response_latency = db.Column(db.Float, nullable=False, server_default=db.text("0")) total_price = db.Column(db.Numeric(10, 7)) currency = db.Column(db.String(255), nullable=False) diff --git a/api/services/message_service.py b/api/services/message_service.py index ecb121c36e..f432a77c80 100644 --- a/api/services/message_service.py +++ b/api/services/message_service.py @@ -34,6 +34,7 @@ class MessageService: conversation_id: str, first_id: Optional[str], limit: int, + order: str = "asc", ) -> InfiniteScrollPagination: if not user: return InfiniteScrollPagination(data=[], limit=limit, has_more=False) @@ -91,7 +92,8 @@ class MessageService: if rest_count > 0: has_more = True - history_messages = list(reversed(history_messages)) + if order == "asc": + history_messages = list(reversed(history_messages)) return InfiniteScrollPagination(data=history_messages, limit=limit, has_more=has_more) diff --git a/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py new file mode 100644 index 0000000000..ba3c1eb5e0 --- /dev/null +++ b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py @@ -0,0 +1,91 @@ +from uuid import uuid4 + +from constants import UUID_NIL +from core.prompt.utils.extract_thread_messages import extract_thread_messages + + +class TestMessage: + def __init__(self, id, parent_message_id): + self.id = id + self.parent_message_id = parent_message_id + + def __getitem__(self, item): + return getattr(self, item) + + +def test_extract_thread_messages_single_message(): + messages = [TestMessage(str(uuid4()), UUID_NIL)] + result = extract_thread_messages(messages) + assert len(result) == 1 + assert result[0] == messages[0] + + +def test_extract_thread_messages_linear_thread(): + id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()) + messages = [ + TestMessage(id5, id4), + TestMessage(id4, id3), + TestMessage(id3, id2), + TestMessage(id2, id1), + TestMessage(id1, UUID_NIL), + ] + result = extract_thread_messages(messages) + assert len(result) == 5 + assert [msg["id"] for msg in result] == [id5, id4, id3, id2, id1] + + +def test_extract_thread_messages_branched_thread(): + id1, id2, id3, id4 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()) + messages = [ + TestMessage(id4, id2), + TestMessage(id3, id2), + TestMessage(id2, id1), + TestMessage(id1, UUID_NIL), + ] + result = extract_thread_messages(messages) + assert len(result) == 3 + assert [msg["id"] for msg in result] == [id4, id2, id1] + + +def test_extract_thread_messages_empty_list(): + messages = [] + result = extract_thread_messages(messages) + assert len(result) == 0 + + +def test_extract_thread_messages_partially_loaded(): + id0, id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()) + messages = [ + TestMessage(id3, id2), + TestMessage(id2, id1), + TestMessage(id1, id0), + ] + result = extract_thread_messages(messages) + assert len(result) == 3 + assert [msg["id"] for msg in result] == [id3, id2, id1] + + +def test_extract_thread_messages_legacy_messages(): + id1, id2, id3 = str(uuid4()), str(uuid4()), str(uuid4()) + messages = [ + TestMessage(id3, UUID_NIL), + TestMessage(id2, UUID_NIL), + TestMessage(id1, UUID_NIL), + ] + result = extract_thread_messages(messages) + assert len(result) == 3 + assert [msg["id"] for msg in result] == [id3, id2, id1] + + +def test_extract_thread_messages_mixed_with_legacy_messages(): + id1, id2, id3, id4, id5 = str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()), str(uuid4()) + messages = [ + TestMessage(id5, id4), + TestMessage(id4, id2), + TestMessage(id3, id2), + TestMessage(id2, UUID_NIL), + TestMessage(id1, UUID_NIL), + ] + result = extract_thread_messages(messages) + assert len(result) == 4 + assert [msg["id"] for msg in result] == [id5, id4, id2, id1] diff --git a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx index 80dfb5c534..1c70f4fe77 100644 --- a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx +++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx @@ -46,6 +46,7 @@ const ChatItem: FC = ({ const config = useConfigFromDebugContext() const { chatList, + chatListRef, isResponding, handleSend, suggestedQuestions, @@ -80,6 +81,7 @@ const ChatItem: FC = ({ query: message, inputs, model_config: configData, + parent_message_id: chatListRef.current.at(-1)?.id || null, } if (visionConfig.enabled && files?.length && supportVision) @@ -93,7 +95,7 @@ const ChatItem: FC = ({ onGetSuggestedQuestions: (responseItemId, getAbortController) => fetchSuggestedQuestions(appId, responseItemId, getAbortController), }, ) - }, [appId, config, handleSend, inputs, modelAndParameter, textGenerationModelList, visionConfig.enabled]) + }, [appId, config, handleSend, inputs, modelAndParameter, textGenerationModelList, visionConfig.enabled, chatListRef]) const { eventEmitter } = useEventEmitterContextContext() eventEmitter?.useSubscription((v: any) => { diff --git a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx index d93ad00659..80e7c98a8f 100644 --- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx +++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx @@ -12,7 +12,7 @@ import { import Chat from '@/app/components/base/chat/chat' import { useChat } from '@/app/components/base/chat/chat/hooks' import { useDebugConfigurationContext } from '@/context/debug-configuration' -import type { OnSend } from '@/app/components/base/chat/types' +import type { ChatItem, OnSend } from '@/app/components/base/chat/types' import { useProviderContext } from '@/context/provider-context' import { fetchConversationMessages, @@ -45,10 +45,12 @@ const DebugWithSingleModel = forwardRef { + const doSend: OnSend = useCallback((message, files, last_answer) => { if (checkCanSend && !checkCanSend()) return const currentProvider = textGenerationModelList.find(item => item.provider === modelConfig.provider) @@ -85,6 +87,7 @@ const DebugWithSingleModel = forwardRef fetchSuggestedQuestions(appId, responseItemId, getAbortController), }, ) - }, [appId, checkCanSend, completionParams, config, handleSend, inputs, modelConfig, textGenerationModelList, visionConfig.enabled]) + }, [chatListRef, appId, checkCanSend, completionParams, config, handleSend, inputs, modelConfig, textGenerationModelList, visionConfig.enabled]) + + const doRegenerate = useCallback((chatItem: ChatItem) => { + const index = chatList.findIndex(item => item.id === chatItem.id) + if (index === -1) + return + + const prevMessages = chatList.slice(0, index) + const question = prevMessages.pop() + const lastAnswer = prevMessages.at(-1) + + if (!question) + return + + handleUpdateChatList(prevMessages) + doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer) + }, [chatList, handleUpdateChatList, doSend]) const allToolIcons = useMemo(() => { const icons: Record = {} @@ -123,6 +142,7 @@ const DebugWithSingleModel = forwardRef} diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx index caec10c4f7..149e877fa4 100644 --- a/web/app/components/app/log/list.tsx +++ b/web/app/components/app/log/list.tsx @@ -16,6 +16,7 @@ import timezone from 'dayjs/plugin/timezone' import { createContext, useContext } from 'use-context-selector' import { useShallow } from 'zustand/react/shallow' import { useTranslation } from 'react-i18next' +import { UUID_NIL } from '../../base/chat/constants' import s from './style.module.css' import VarPanel from './var-panel' import cn from '@/utils/classnames' @@ -81,72 +82,92 @@ const PARAM_MAP = { frequency_penalty: 'Frequency Penalty', } -// Format interface data for easy display +function appendQAToChatList(newChatList: IChatItem[], item: any, conversationId: string, timezone: string, format: string) { + newChatList.push({ + id: item.id, + content: item.answer, + agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files), + feedback: item.feedbacks.find((item: any) => item.from_source === 'user'), // user feedback + adminFeedback: item.feedbacks.find((item: any) => item.from_source === 'admin'), // admin feedback + feedbackDisabled: false, + isAnswer: true, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], + log: [ + ...item.message, + ...(item.message[item.message.length - 1]?.role !== 'assistant' + ? [ + { + role: 'assistant', + text: item.answer, + files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], + }, + ] + : []), + ], + workflow_run_id: item.workflow_run_id, + conversationId, + input: { + inputs: item.inputs, + query: item.query, + }, + more: { + time: dayjs.unix(item.created_at).tz(timezone).format(format), + tokens: item.answer_tokens + item.message_tokens, + latency: item.provider_response_latency.toFixed(2), + }, + citation: item.metadata?.retriever_resources, + annotation: (() => { + if (item.annotation_hit_history) { + return { + id: item.annotation_hit_history.annotation_id, + authorName: item.annotation_hit_history.annotation_create_account?.name || 'N/A', + created_at: item.annotation_hit_history.created_at, + } + } + + if (item.annotation) { + return { + id: item.annotation.id, + authorName: item.annotation.account.name, + logAnnotation: item.annotation, + created_at: 0, + } + } + + return undefined + })(), + parentMessageId: `question-${item.id}`, + }) + newChatList.push({ + id: `question-${item.id}`, + content: item.inputs.query || item.inputs.default_input || item.query, // text generation: item.inputs.query; chat: item.query + isAnswer: false, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], + parentMessageId: item.parent_message_id || undefined, + }) +} + const getFormattedChatList = (messages: ChatMessage[], conversationId: string, timezone: string, format: string) => { const newChatList: IChatItem[] = [] - messages.forEach((item: ChatMessage) => { - newChatList.push({ - id: `question-${item.id}`, - content: item.inputs.query || item.inputs.default_input || item.query, // text generation: item.inputs.query; chat: item.query - isAnswer: false, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], - }) - newChatList.push({ - id: item.id, - content: item.answer, - agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files), - feedback: item.feedbacks.find(item => item.from_source === 'user'), // user feedback - adminFeedback: item.feedbacks.find(item => item.from_source === 'admin'), // admin feedback - feedbackDisabled: false, - isAnswer: true, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], - log: [ - ...item.message, - ...(item.message[item.message.length - 1]?.role !== 'assistant' - ? [ - { - role: 'assistant', - text: item.answer, - files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], - }, - ] - : []), - ], - workflow_run_id: item.workflow_run_id, - conversationId, - input: { - inputs: item.inputs, - query: item.query, - }, - more: { - time: dayjs.unix(item.created_at).tz(timezone).format(format), - tokens: item.answer_tokens + item.message_tokens, - latency: item.provider_response_latency.toFixed(2), - }, - citation: item.metadata?.retriever_resources, - annotation: (() => { - if (item.annotation_hit_history) { - return { - id: item.annotation_hit_history.annotation_id, - authorName: item.annotation_hit_history.annotation_create_account?.name || 'N/A', - created_at: item.annotation_hit_history.created_at, - } - } + let nextMessageId = null + for (const item of messages) { + if (!item.parent_message_id) { + appendQAToChatList(newChatList, item, conversationId, timezone, format) + break + } - if (item.annotation) { - return { - id: item.annotation.id, - authorName: item.annotation.account.name, - logAnnotation: item.annotation, - created_at: 0, - } - } - - return undefined - })(), - }) - }) - return newChatList + if (!nextMessageId) { + appendQAToChatList(newChatList, item, conversationId, timezone, format) + nextMessageId = item.parent_message_id + } + else { + if (item.id === nextMessageId || nextMessageId === UUID_NIL) { + appendQAToChatList(newChatList, item, conversationId, timezone, format) + nextMessageId = item.parent_message_id + } + } + } + return newChatList.reverse() } // const displayedParams = CompletionParams.slice(0, -2) @@ -171,6 +192,7 @@ function DetailPanel([]) + const fetchedMessages = useRef([]) const [hasMore, setHasMore] = useState(true) const [varValues, setVarValues] = useState>({}) const fetchData = async () => { @@ -192,7 +214,8 @@ function DetailPanel - : items.length < 8 + : (items.length < 8 && !hasMore) ?
{ }, [appParams, currentConversationItem?.introduction, currentConversationId]) const { chatList, + chatListRef, + handleUpdateChatList, handleSend, handleStop, isResponding, @@ -63,11 +66,12 @@ const ChatWrapper = () => { currentChatInstanceRef.current.handleStop = handleStop }, []) - const doSend: OnSend = useCallback((message, files) => { + const doSend: OnSend = useCallback((message, files, last_answer) => { const data: any = { query: message, inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs, conversation_id: currentConversationId, + parent_message_id: last_answer?.id || chatListRef.current.at(-1)?.id || null, } if (appConfig?.file_upload?.image.enabled && files?.length) @@ -83,6 +87,7 @@ const ChatWrapper = () => { }, ) }, [ + chatListRef, appConfig, currentConversationId, currentConversationItem, @@ -92,6 +97,23 @@ const ChatWrapper = () => { isInstalledApp, appId, ]) + + const doRegenerate = useCallback((chatItem: ChatItem) => { + const index = chatList.findIndex(item => item.id === chatItem.id) + if (index === -1) + return + + const prevMessages = chatList.slice(0, index) + const question = prevMessages.pop() + const lastAnswer = prevMessages.at(-1) + + if (!question) + return + + handleUpdateChatList(prevMessages) + doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer) + }, [chatList, handleUpdateChatList, doSend]) + const chatNode = useMemo(() => { if (inputsForms.length) { return ( @@ -148,6 +170,7 @@ const ChatWrapper = () => { chatFooterClassName='pb-4' chatFooterInnerClassName={`mx-auto w-full max-w-full ${isMobile && 'px-4'}`} onSend={doSend} + onRegenerate={doRegenerate} onStopResponding={handleStop} chatNode={chatNode} allToolIcons={appMeta?.tool_icons || {}} diff --git a/web/app/components/base/chat/chat-with-history/hooks.tsx b/web/app/components/base/chat/chat-with-history/hooks.tsx index 1e05cc39ef..b9ebc42ec8 100644 --- a/web/app/components/base/chat/chat-with-history/hooks.tsx +++ b/web/app/components/base/chat/chat-with-history/hooks.tsx @@ -12,10 +12,10 @@ import produce from 'immer' import type { Callback, ChatConfig, - ChatItem, Feedback, } from '../types' import { CONVERSATION_ID_INFO } from '../constants' +import { getPrevChatList } from '../utils' import { delConversation, fetchAppInfo, @@ -34,7 +34,6 @@ import type { AppData, ConversationItem, } from '@/models/share' -import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils' import { useToastContext } from '@/app/components/base/toast' import { changeLanguage } from '@/i18n/i18next-config' import { useAppFavicon } from '@/hooks/use-app-favicon' @@ -108,32 +107,12 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => { const { data: appConversationData, isLoading: appConversationDataLoading, mutate: mutateAppConversationData } = useSWR(['appConversationData', isInstalledApp, appId, false], () => fetchConversations(isInstalledApp, appId, undefined, false, 100)) const { data: appChatListData, isLoading: appChatListDataLoading } = useSWR(chatShouldReloadKey ? ['appChatList', chatShouldReloadKey, isInstalledApp, appId] : null, () => fetchChatList(chatShouldReloadKey, isInstalledApp, appId)) - const appPrevChatList = useMemo(() => { - const data = appChatListData?.data || [] - const chatList: ChatItem[] = [] - - if (currentConversationId && data.length) { - data.forEach((item: any) => { - chatList.push({ - id: `question-${item.id}`, - content: item.query, - isAnswer: false, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], - }) - chatList.push({ - id: item.id, - content: item.answer, - agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files), - feedback: item.feedback, - isAnswer: true, - citation: item.retriever_resources, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], - }) - }) - } - - return chatList - }, [appChatListData, currentConversationId]) + const appPrevChatList = useMemo( + () => (currentConversationId && appChatListData?.data.length) + ? getPrevChatList(appChatListData.data) + : [], + [appChatListData, currentConversationId], + ) const [showNewConversationItemInList, setShowNewConversationItemInList] = useState(false) diff --git a/web/app/components/base/chat/chat/answer/index.tsx b/web/app/components/base/chat/chat/answer/index.tsx index 5fe2a7bad5..705cd73ddf 100644 --- a/web/app/components/base/chat/chat/answer/index.tsx +++ b/web/app/components/base/chat/chat/answer/index.tsx @@ -35,6 +35,7 @@ type AnswerProps = { chatAnswerContainerInner?: string hideProcessDetail?: boolean appData?: AppData + noChatInput?: boolean } const Answer: FC = ({ item, @@ -48,6 +49,7 @@ const Answer: FC = ({ chatAnswerContainerInner, hideProcessDetail, appData, + noChatInput, }) => { const { t } = useTranslation() const { @@ -110,6 +112,7 @@ const Answer: FC = ({ question={question} index={index} showPromptLog={showPromptLog} + noChatInput={noChatInput} /> ) } diff --git a/web/app/components/base/chat/chat/answer/operation.tsx b/web/app/components/base/chat/chat/answer/operation.tsx index 08267bb09c..5e5fc3b204 100644 --- a/web/app/components/base/chat/chat/answer/operation.tsx +++ b/web/app/components/base/chat/chat/answer/operation.tsx @@ -7,6 +7,7 @@ import { import { useTranslation } from 'react-i18next' import type { ChatItem } from '../../types' import { useChatContext } from '../context' +import RegenerateBtn from '@/app/components/base/regenerate-btn' import cn from '@/utils/classnames' import CopyBtn from '@/app/components/base/copy-btn' import { MessageFast } from '@/app/components/base/icons/src/vender/solid/communication' @@ -28,6 +29,7 @@ type OperationProps = { maxSize: number contentWidth: number hasWorkflowProcess: boolean + noChatInput?: boolean } const Operation: FC = ({ item, @@ -37,6 +39,7 @@ const Operation: FC = ({ maxSize, contentWidth, hasWorkflowProcess, + noChatInput, }) => { const { t } = useTranslation() const { @@ -45,6 +48,7 @@ const Operation: FC = ({ onAnnotationEdited, onAnnotationRemoved, onFeedback, + onRegenerate, } = useChatContext() const [isShowReplyModal, setIsShowReplyModal] = useState(false) const { @@ -159,12 +163,13 @@ const Operation: FC = ({
) } + { + !isOpeningStatement && !noChatInput && onRegenerate?.(item)} /> + } { config?.supportFeedback && !localFeedback?.rating && onFeedback && !isOpeningStatement && ( -
- +
+
handleFeedback('like')} diff --git a/web/app/components/base/chat/chat/context.tsx b/web/app/components/base/chat/chat/context.tsx index ba6f67189e..c47b750176 100644 --- a/web/app/components/base/chat/chat/context.tsx +++ b/web/app/components/base/chat/chat/context.tsx @@ -12,6 +12,7 @@ export type ChatContextValue = Pick void noChatInput?: boolean onSend?: OnSend + onRegenerate?: OnRegenerate chatContainerClassName?: string chatContainerInnerClassName?: string chatFooterClassName?: string @@ -67,6 +69,7 @@ const Chat: FC = ({ appData, config, onSend, + onRegenerate, chatList, isResponding, noStopResponding, @@ -186,6 +189,7 @@ const Chat: FC = ({ answerIcon={answerIcon} allToolIcons={allToolIcons} onSend={onSend} + onRegenerate={onRegenerate} onAnnotationAdded={onAnnotationAdded} onAnnotationEdited={onAnnotationEdited} onAnnotationRemoved={onAnnotationRemoved} @@ -219,6 +223,7 @@ const Chat: FC = ({ showPromptLog={showPromptLog} chatAnswerContainerInner={chatAnswerContainerInner} hideProcessDetail={hideProcessDetail} + noChatInput={noChatInput} /> ) } diff --git a/web/app/components/base/chat/chat/type.ts b/web/app/components/base/chat/chat/type.ts index b2cb18011c..dd26a4179d 100644 --- a/web/app/components/base/chat/chat/type.ts +++ b/web/app/components/base/chat/chat/type.ts @@ -95,6 +95,7 @@ export type IChatItem = { // for agent log conversationId?: string input?: any + parentMessageId?: string } export type Metadata = { diff --git a/web/app/components/base/chat/constants.ts b/web/app/components/base/chat/constants.ts index 8249be7375..309f0f04a7 100644 --- a/web/app/components/base/chat/constants.ts +++ b/web/app/components/base/chat/constants.ts @@ -1 +1,2 @@ export const CONVERSATION_ID_INFO = 'conversationIdInfo' +export const UUID_NIL = '00000000-0000-0000-0000-000000000000' diff --git a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx index 48ee411058..8cb546fd52 100644 --- a/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx +++ b/web/app/components/base/chat/embedded-chatbot/chat-wrapper.tsx @@ -2,6 +2,7 @@ import { useCallback, useEffect, useMemo } from 'react' import Chat from '../chat' import type { ChatConfig, + ChatItem, OnSend, } from '../types' import { useChat } from '../chat/hooks' @@ -45,11 +46,13 @@ const ChatWrapper = () => { } as ChatConfig }, [appParams, currentConversationItem?.introduction, currentConversationId]) const { + chatListRef, chatList, handleSend, handleStop, isResponding, suggestedQuestions, + handleUpdateChatList, } = useChat( appConfig, { @@ -65,11 +68,12 @@ const ChatWrapper = () => { currentChatInstanceRef.current.handleStop = handleStop }, []) - const doSend: OnSend = useCallback((message, files) => { + const doSend: OnSend = useCallback((message, files, last_answer) => { const data: any = { query: message, inputs: currentConversationId ? currentConversationItem?.inputs : newConversationInputs, conversation_id: currentConversationId, + parent_message_id: last_answer?.id || chatListRef.current.at(-1)?.id || null, } if (appConfig?.file_upload?.image.enabled && files?.length) @@ -85,6 +89,7 @@ const ChatWrapper = () => { }, ) }, [ + chatListRef, appConfig, currentConversationId, currentConversationItem, @@ -94,6 +99,23 @@ const ChatWrapper = () => { isInstalledApp, appId, ]) + + const doRegenerate = useCallback((chatItem: ChatItem) => { + const index = chatList.findIndex(item => item.id === chatItem.id) + if (index === -1) + return + + const prevMessages = chatList.slice(0, index) + const question = prevMessages.pop() + const lastAnswer = prevMessages.at(-1) + + if (!question) + return + + handleUpdateChatList(prevMessages) + doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer) + }, [chatList, handleUpdateChatList, doSend]) + const chatNode = useMemo(() => { if (inputsForms.length) { return ( @@ -136,6 +158,7 @@ const ChatWrapper = () => { chatFooterClassName='pb-4' chatFooterInnerClassName={cn('mx-auto w-full max-w-full tablet:px-4', isMobile && 'px-4')} onSend={doSend} + onRegenerate={doRegenerate} onStopResponding={handleStop} chatNode={chatNode} allToolIcons={appMeta?.tool_icons || {}} diff --git a/web/app/components/base/chat/embedded-chatbot/hooks.tsx b/web/app/components/base/chat/embedded-chatbot/hooks.tsx index 39d25f57d1..fd89efcbff 100644 --- a/web/app/components/base/chat/embedded-chatbot/hooks.tsx +++ b/web/app/components/base/chat/embedded-chatbot/hooks.tsx @@ -11,10 +11,10 @@ import { useLocalStorageState } from 'ahooks' import produce from 'immer' import type { ChatConfig, - ChatItem, Feedback, } from '../types' import { CONVERSATION_ID_INFO } from '../constants' +import { getPrevChatList, getProcessedInputsFromUrlParams } from '../utils' import { fetchAppInfo, fetchAppMeta, @@ -28,10 +28,8 @@ import type { // AppData, ConversationItem, } from '@/models/share' -import { addFileInfos, sortAgentSorts } from '@/app/components/tools/utils' import { useToastContext } from '@/app/components/base/toast' import { changeLanguage } from '@/i18n/i18next-config' -import { getProcessedInputsFromUrlParams } from '@/app/components/base/chat/utils' export const useEmbeddedChatbot = () => { const isInstalledApp = false @@ -75,32 +73,12 @@ export const useEmbeddedChatbot = () => { const { data: appConversationData, isLoading: appConversationDataLoading, mutate: mutateAppConversationData } = useSWR(['appConversationData', isInstalledApp, appId, false], () => fetchConversations(isInstalledApp, appId, undefined, false, 100)) const { data: appChatListData, isLoading: appChatListDataLoading } = useSWR(chatShouldReloadKey ? ['appChatList', chatShouldReloadKey, isInstalledApp, appId] : null, () => fetchChatList(chatShouldReloadKey, isInstalledApp, appId)) - const appPrevChatList = useMemo(() => { - const data = appChatListData?.data || [] - const chatList: ChatItem[] = [] - - if (currentConversationId && data.length) { - data.forEach((item: any) => { - chatList.push({ - id: `question-${item.id}`, - content: item.query, - isAnswer: false, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], - }) - chatList.push({ - id: item.id, - content: item.answer, - agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files), - feedback: item.feedback, - isAnswer: true, - citation: item.retriever_resources, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], - }) - }) - } - - return chatList - }, [appChatListData, currentConversationId]) + const appPrevChatList = useMemo( + () => (currentConversationId && appChatListData?.data.length) + ? getPrevChatList(appChatListData.data) + : [], + [appChatListData, currentConversationId], + ) const [showNewConversationItemInList, setShowNewConversationItemInList] = useState(false) @@ -155,7 +133,7 @@ export const useEmbeddedChatbot = () => { type: 'text-input', } }) - }, [appParams]) + }, [initInputs, appParams]) useEffect(() => { // init inputs from url params diff --git a/web/app/components/base/chat/types.ts b/web/app/components/base/chat/types.ts index 21277fec57..489dbb44cf 100644 --- a/web/app/components/base/chat/types.ts +++ b/web/app/components/base/chat/types.ts @@ -63,7 +63,9 @@ export type ChatItem = IChatItem & { conversationId?: string } -export type OnSend = (message: string, files?: VisionFile[]) => void +export type OnSend = (message: string, files?: VisionFile[], last_answer?: ChatItem) => void + +export type OnRegenerate = (chatItem: ChatItem) => void export type Callback = { onSuccess: () => void diff --git a/web/app/components/base/chat/utils.ts b/web/app/components/base/chat/utils.ts index 3fe5050cc7..e851c4c463 100644 --- a/web/app/components/base/chat/utils.ts +++ b/web/app/components/base/chat/utils.ts @@ -1,7 +1,11 @@ +import { addFileInfos, sortAgentSorts } from '../../tools/utils' +import { UUID_NIL } from './constants' +import type { ChatItem } from './types' + async function decodeBase64AndDecompress(base64String: string) { const binaryString = atob(base64String) const compressedUint8Array = Uint8Array.from(binaryString, char => char.charCodeAt(0)) - const decompressedStream = new Response(compressedUint8Array).body.pipeThrough(new DecompressionStream('gzip')) + const decompressedStream = new Response(compressedUint8Array).body?.pipeThrough(new DecompressionStream('gzip')) const decompressedArrayBuffer = await new Response(decompressedStream).arrayBuffer() return new TextDecoder().decode(decompressedArrayBuffer) } @@ -15,6 +19,57 @@ function getProcessedInputsFromUrlParams(): Record { return inputs } +function appendQAToChatList(chatList: ChatItem[], item: any) { + // we append answer first and then question since will reverse the whole chatList later + chatList.push({ + id: item.id, + content: item.answer, + agent_thoughts: addFileInfos(item.agent_thoughts ? sortAgentSorts(item.agent_thoughts) : item.agent_thoughts, item.message_files), + feedback: item.feedback, + isAnswer: true, + citation: item.retriever_resources, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], + }) + chatList.push({ + id: `question-${item.id}`, + content: item.query, + isAnswer: false, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], + }) +} + +/** + * Computes the latest thread messages from all messages of the conversation. + * Same logic as backend codebase `api/core/prompt/utils/extract_thread_messages.py` + * + * @param fetchedMessages - The history chat list data from the backend, sorted by created_at in descending order. This includes all flattened history messages of the conversation. + * @returns An array of ChatItems representing the latest thread. + */ +function getPrevChatList(fetchedMessages: any[]) { + const ret: ChatItem[] = [] + let nextMessageId = null + + for (const item of fetchedMessages) { + if (!item.parent_message_id) { + appendQAToChatList(ret, item) + break + } + + if (!nextMessageId) { + appendQAToChatList(ret, item) + nextMessageId = item.parent_message_id + } + else { + if (item.id === nextMessageId || nextMessageId === UUID_NIL) { + appendQAToChatList(ret, item) + nextMessageId = item.parent_message_id + } + } + } + return ret.reverse() +} + export { getProcessedInputsFromUrlParams, + getPrevChatList, } diff --git a/web/app/components/base/icons/assets/vender/line/general/refresh.svg b/web/app/components/base/icons/assets/vender/line/general/refresh.svg new file mode 100644 index 0000000000..05cf986827 --- /dev/null +++ b/web/app/components/base/icons/assets/vender/line/general/refresh.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/app/components/base/icons/src/vender/line/general/Refresh.json b/web/app/components/base/icons/src/vender/line/general/Refresh.json new file mode 100644 index 0000000000..128dcb7d4d --- /dev/null +++ b/web/app/components/base/icons/src/vender/line/general/Refresh.json @@ -0,0 +1,23 @@ +{ + "icon": { + "type": "element", + "isRootNode": true, + "name": "svg", + "attributes": { + "xmlns": "http://www.w3.org/2000/svg", + "viewBox": "0 0 24 24", + "fill": "currentColor" + }, + "children": [ + { + "type": "element", + "name": "path", + "attributes": { + "d": "M5.46257 4.43262C7.21556 2.91688 9.5007 2 12 2C17.5228 2 22 6.47715 22 12C22 14.1361 21.3302 16.1158 20.1892 17.7406L17 12H20C20 7.58172 16.4183 4 12 4C9.84982 4 7.89777 4.84827 6.46023 6.22842L5.46257 4.43262ZM18.5374 19.5674C16.7844 21.0831 14.4993 22 12 22C6.47715 22 2 17.5228 2 12C2 9.86386 2.66979 7.88416 3.8108 6.25944L7 12H4C4 16.4183 7.58172 20 12 20C14.1502 20 16.1022 19.1517 17.5398 17.7716L18.5374 19.5674Z" + }, + "children": [] + } + ] + }, + "name": "Refresh" +} \ No newline at end of file diff --git a/web/app/components/base/icons/src/vender/line/general/Refresh.tsx b/web/app/components/base/icons/src/vender/line/general/Refresh.tsx new file mode 100644 index 0000000000..96641f1c42 --- /dev/null +++ b/web/app/components/base/icons/src/vender/line/general/Refresh.tsx @@ -0,0 +1,16 @@ +// GENERATE BY script +// DON NOT EDIT IT MANUALLY + +import * as React from 'react' +import data from './Refresh.json' +import IconBase from '@/app/components/base/icons/IconBase' +import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase' + +const Icon = React.forwardRef, Omit>(( + props, + ref, +) => ) + +Icon.displayName = 'Refresh' + +export default Icon diff --git a/web/app/components/base/icons/src/vender/line/general/index.ts b/web/app/components/base/icons/src/vender/line/general/index.ts index c1af2e4994..b5c7a7bbc1 100644 --- a/web/app/components/base/icons/src/vender/line/general/index.ts +++ b/web/app/components/base/icons/src/vender/line/general/index.ts @@ -18,6 +18,7 @@ export { default as Menu01 } from './Menu01' export { default as Pin01 } from './Pin01' export { default as Pin02 } from './Pin02' export { default as Plus02 } from './Plus02' +export { default as Refresh } from './Refresh' export { default as Settings01 } from './Settings01' export { default as Settings04 } from './Settings04' export { default as Target04 } from './Target04' diff --git a/web/app/components/base/regenerate-btn/index.tsx b/web/app/components/base/regenerate-btn/index.tsx new file mode 100644 index 0000000000..aaf0206df6 --- /dev/null +++ b/web/app/components/base/regenerate-btn/index.tsx @@ -0,0 +1,31 @@ +'use client' +import { t } from 'i18next' +import { Refresh } from '../icons/src/vender/line/general' +import Tooltip from '@/app/components/base/tooltip' + +type Props = { + className?: string + onClick?: () => void +} + +const RegenerateBtn = ({ className, onClick }: Props) => { + return ( +
+ +
onClick?.()} + style={{ + boxShadow: '0px 4px 8px -2px rgba(16, 24, 40, 0.1), 0px 2px 4px -2px rgba(16, 24, 40, 0.06)', + }} + > + +
+
+
+ ) +} + +export default RegenerateBtn diff --git a/web/app/components/workflow/panel/chat-record/index.tsx b/web/app/components/workflow/panel/chat-record/index.tsx index afd20b7358..1bcfd6474d 100644 --- a/web/app/components/workflow/panel/chat-record/index.tsx +++ b/web/app/components/workflow/panel/chat-record/index.tsx @@ -2,7 +2,6 @@ import { memo, useCallback, useEffect, - useMemo, useState, } from 'react' import { RiCloseLine } from '@remixicon/react' @@ -17,50 +16,70 @@ import type { ChatItem } from '@/app/components/base/chat/types' import { fetchConversationMessages } from '@/service/debug' import { useStore as useAppStore } from '@/app/components/app/store' import Loading from '@/app/components/base/loading' +import { UUID_NIL } from '@/app/components/base/chat/constants' + +function appendQAToChatList(newChatList: ChatItem[], item: any) { + newChatList.push({ + id: item.id, + content: item.answer, + feedback: item.feedback, + isAnswer: true, + citation: item.metadata?.retriever_resources, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], + workflow_run_id: item.workflow_run_id, + }) + newChatList.push({ + id: `question-${item.id}`, + content: item.query, + isAnswer: false, + message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], + }) +} + +function getFormattedChatList(messages: any[]) { + const newChatList: ChatItem[] = [] + let nextMessageId = null + for (const item of messages) { + if (!item.parent_message_id) { + appendQAToChatList(newChatList, item) + break + } + + if (!nextMessageId) { + appendQAToChatList(newChatList, item) + nextMessageId = item.parent_message_id + } + else { + if (item.id === nextMessageId || nextMessageId === UUID_NIL) { + appendQAToChatList(newChatList, item) + nextMessageId = item.parent_message_id + } + } + } + return newChatList.reverse() +} const ChatRecord = () => { const [fetched, setFetched] = useState(false) - const [chatList, setChatList] = useState([]) + const [chatList, setChatList] = useState([]) const appDetail = useAppStore(s => s.appDetail) const workflowStore = useWorkflowStore() const { handleLoadBackupDraft } = useWorkflowRun() const historyWorkflowData = useStore(s => s.historyWorkflowData) const currentConversationID = historyWorkflowData?.conversation_id - const chatMessageList = useMemo(() => { - const res: ChatItem[] = [] - if (chatList.length) { - chatList.forEach((item: any) => { - res.push({ - id: `question-${item.id}`, - content: item.query, - isAnswer: false, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'user') || [], - }) - res.push({ - id: item.id, - content: item.answer, - feedback: item.feedback, - isAnswer: true, - citation: item.metadata?.retriever_resources, - message_files: item.message_files?.filter((file: any) => file.belongs_to === 'assistant') || [], - workflow_run_id: item.workflow_run_id, - }) - }) - } - return res - }, [chatList]) - const handleFetchConversationMessages = useCallback(async () => { if (appDetail && currentConversationID) { try { setFetched(false) const res = await fetchConversationMessages(appDetail.id, currentConversationID) - setFetched(true) - setChatList((res as any).data) + setChatList(getFormattedChatList((res as any).data)) } catch (e) { - + console.error(e) + } + finally { + setFetched(true) } } }, [appDetail, currentConversationID]) @@ -101,7 +120,7 @@ const ChatRecord = () => { config={{ supportCitationHitInfo: true, } as any} - chatList={chatMessageList} + chatList={chatList} chatContainerClassName='px-4' chatContainerInnerClassName='pt-6 w-full max-w-full mx-auto' chatFooterClassName='px-4 rounded-b-2xl' diff --git a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx index a7dd607e22..107a5dc698 100644 --- a/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx +++ b/web/app/components/workflow/panel/debug-and-preview/chat-wrapper.tsx @@ -18,7 +18,7 @@ import ConversationVariableModal from './conversation-variable-modal' import { useChat } from './hooks' import type { ChatWrapperRefType } from './index' import Chat from '@/app/components/base/chat/chat' -import type { OnSend } from '@/app/components/base/chat/types' +import type { ChatItem, OnSend } from '@/app/components/base/chat/types' import { useFeaturesStore } from '@/app/components/base/features/hooks' import { fetchSuggestedQuestions, @@ -58,6 +58,8 @@ const ChatWrapper = forwardRef(({ showConv const { conversationId, chatList, + chatListRef, + handleUpdateChatList, handleStop, isResponding, suggestedQuestions, @@ -73,19 +75,36 @@ const ChatWrapper = forwardRef(({ showConv taskId => stopChatMessageResponding(appDetail!.id, taskId), ) - const doSend = useCallback((query, files) => { + const doSend = useCallback((query, files, last_answer) => { handleSend( { query, files, inputs: workflowStore.getState().inputs, conversation_id: conversationId, + parent_message_id: last_answer?.id || chatListRef.current.at(-1)?.id || null, }, { onGetSuggestedQuestions: (messageId, getAbortController) => fetchSuggestedQuestions(appDetail!.id, messageId, getAbortController), }, ) - }, [conversationId, handleSend, workflowStore, appDetail]) + }, [chatListRef, conversationId, handleSend, workflowStore, appDetail]) + + const doRegenerate = useCallback((chatItem: ChatItem) => { + const index = chatList.findIndex(item => item.id === chatItem.id) + if (index === -1) + return + + const prevMessages = chatList.slice(0, index) + const question = prevMessages.pop() + const lastAnswer = prevMessages.at(-1) + + if (!question) + return + + handleUpdateChatList(prevMessages) + doSend(question.content, question.message_files, (!lastAnswer || lastAnswer.isOpeningStatement) ? undefined : lastAnswer) + }, [chatList, handleUpdateChatList, doSend]) useImperativeHandle(ref, () => { return { @@ -107,6 +126,7 @@ const ChatWrapper = forwardRef(({ showConv chatFooterClassName='px-4 rounded-bl-2xl' chatFooterInnerClassName='pb-4 w-full max-w-full mx-auto' onSend={doSend} + onRegenerate={doRegenerate} onStopResponding={handleStop} chatNode={( <> diff --git a/web/app/components/workflow/panel/debug-and-preview/hooks.ts b/web/app/components/workflow/panel/debug-and-preview/hooks.ts index 51a018bcb1..cad76a4490 100644 --- a/web/app/components/workflow/panel/debug-and-preview/hooks.ts +++ b/web/app/components/workflow/panel/debug-and-preview/hooks.ts @@ -387,6 +387,8 @@ export const useChat = ( return { conversationId: conversationId.current, chatList, + chatListRef, + handleUpdateChatList, handleSend, handleStop, handleRestart, diff --git a/web/hooks/use-app-favicon.ts b/web/hooks/use-app-favicon.ts index 86eadc1b3d..1ff743928f 100644 --- a/web/hooks/use-app-favicon.ts +++ b/web/hooks/use-app-favicon.ts @@ -5,10 +5,10 @@ import type { AppIconType } from '@/types/app' type UseAppFaviconOptions = { enable?: boolean - icon_type?: AppIconType + icon_type?: AppIconType | null icon?: string - icon_background?: string - icon_url?: string + icon_background?: string | null + icon_url?: string | null } export function useAppFavicon(options: UseAppFaviconOptions) { diff --git a/web/i18n/en-US/app-api.ts b/web/i18n/en-US/app-api.ts index 631faeee9a..355ff30602 100644 --- a/web/i18n/en-US/app-api.ts +++ b/web/i18n/en-US/app-api.ts @@ -6,6 +6,7 @@ const translation = { ok: 'In Service', copy: 'Copy', copied: 'Copied', + regenerate: 'Regenerate', play: 'Play', pause: 'Pause', playing: 'Playing', diff --git a/web/i18n/zh-Hans/app-api.ts b/web/i18n/zh-Hans/app-api.ts index 6b9048b66e..a0defdab62 100644 --- a/web/i18n/zh-Hans/app-api.ts +++ b/web/i18n/zh-Hans/app-api.ts @@ -6,6 +6,7 @@ const translation = { ok: '运行中', copy: '复制', copied: '已复制', + regenerate: '重新生成', play: '播放', pause: '暂停', playing: '播放中', diff --git a/web/models/log.ts b/web/models/log.ts index 8da1c4cf4e..dc557bfe21 100644 --- a/web/models/log.ts +++ b/web/models/log.ts @@ -106,6 +106,7 @@ export type MessageContent = { metadata: Metadata agent_thoughts: any[] // TODO workflow_run_id: string + parent_message_id: string | null } export type CompletionConversationGeneralDetail = { From a587f0d3f14cde1c36ae5206c8eac5b87ec84738 Mon Sep 17 00:00:00 2001 From: Shota Totsuka <153569547+totsukash@users.noreply.github.com> Date: Sun, 22 Sep 2024 10:04:00 +0900 Subject: [PATCH 25/40] docs: Add Japanese documentation for tools (#8469) --- api/core/tools/README.md | 4 +- api/core/tools/README_CN.md | 4 +- api/core/tools/README_JP.md | 31 ++ api/core/tools/docs/en_US/tool_scale_out.md | 2 +- .../{zh_Hans => }/images/index/image-1.png | Bin .../{zh_Hans => }/images/index/image-2.png | Bin .../docs/{zh_Hans => }/images/index/image.png | Bin .../tools/docs/ja_JP/advanced_scale_out.md | 283 ++++++++++++++++++ api/core/tools/docs/ja_JP/tool_scale_out.md | 240 +++++++++++++++ api/core/tools/docs/zh_Hans/tool_scale_out.md | 2 +- 10 files changed, 560 insertions(+), 6 deletions(-) create mode 100644 api/core/tools/README_JP.md rename api/core/tools/docs/{zh_Hans => }/images/index/image-1.png (100%) rename api/core/tools/docs/{zh_Hans => }/images/index/image-2.png (100%) rename api/core/tools/docs/{zh_Hans => }/images/index/image.png (100%) create mode 100644 api/core/tools/docs/ja_JP/advanced_scale_out.md create mode 100644 api/core/tools/docs/ja_JP/tool_scale_out.md diff --git a/api/core/tools/README.md b/api/core/tools/README.md index c7ee81422e..b5d0a30d34 100644 --- a/api/core/tools/README.md +++ b/api/core/tools/README.md @@ -9,10 +9,10 @@ The tools provided for Agents and Workflows are currently divided into two categ - `Api-Based Tools` leverage third-party APIs for implementation. You don't need to code to integrate these -- simply provide interface definitions in formats like `OpenAPI` , `Swagger`, or the `OpenAI-plugin` on the front-end. ### Built-in Tool Providers -![Alt text](docs/zh_Hans/images/index/image.png) +![Alt text](docs/images/index/image.png) ### API Tool Providers -![Alt text](docs/zh_Hans/images/index/image-1.png) +![Alt text](docs/images/index/image-1.png) ## Tool Integration diff --git a/api/core/tools/README_CN.md b/api/core/tools/README_CN.md index fda5d0630c..7e18441131 100644 --- a/api/core/tools/README_CN.md +++ b/api/core/tools/README_CN.md @@ -12,10 +12,10 @@ - `Api-Based Tools` 基于API的工具,即通过调用第三方API实现的工具,`Api-Based Tool`不需要再额外定义,只需提供`OpenAPI` `Swagger` `OpenAI plugin`等接口文档即可。 ### 内置工具供应商 -![Alt text](docs/zh_Hans/images/index/image.png) +![Alt text](docs/images/index/image.png) ### API工具供应商 -![Alt text](docs/zh_Hans/images/index/image-1.png) +![Alt text](docs/images/index/image-1.png) ## 工具接入 为了实现更灵活更强大的功能,Tools提供了一系列的接口,帮助开发者快速构建想要的工具,本文作为开发者的入门指南,将会以[快速接入](./docs/zh_Hans/tool_scale_out.md)和[高级接入](./docs/zh_Hans/advanced_scale_out.md)两部分介绍如何接入工具。 diff --git a/api/core/tools/README_JP.md b/api/core/tools/README_JP.md new file mode 100644 index 0000000000..39d0bf1762 --- /dev/null +++ b/api/core/tools/README_JP.md @@ -0,0 +1,31 @@ +# Tools + +このモジュールは、Difyのエージェントアシスタントやワークフローで使用される組み込みツールを実装しています。このモジュールでは、フロントエンドのロジックを変更することなく、独自のツールを定義し表示することができます。この分離により、Difyの機能を容易に水平方向にスケールアウトできます。 + +## 機能紹介 + +エージェントとワークフロー向けに提供されるツールは、現在2つのカテゴリーに分類されています。 + +- `Built-in Tools`はDify内部で実装され、エージェントとワークフローで使用するためにハードコードされています。 +- `Api-Based Tools`はサードパーティのAPIを利用して実装されています。これらを統合するためのコーディングは不要で、フロントエンドで + `OpenAPI`, `Swagger`または`OpenAI-plugin`などの形式でインターフェース定義を提供するだけです。 + +### 組み込みツールプロバイダー + +![Alt text](docs/images/index/image.png) + +### APIツールプロバイダー + +![Alt text](docs/images/index/image-1.png) + +## ツールの統合 + +開発者が柔軟で強力なツールを構築できるよう、2つのガイドを提供しています。 + +### [クイック統合 👈🏻](./docs/ja_JP/tool_scale_out.md) + +クイック統合は、Google検索ツールの例を通じて、ツール統合の基本をすばやく理解できるようにすることを目的としています。 + +### [高度な統合 👈🏻](./docs/ja_JP/advanced_scale_out.md) + +高度な統合では、モジュールインターフェースについてより深く掘り下げ、画像生成、複数ツールの組み合わせ、異なるツール間でのパラメーター、画像、ファイルのフロー管理など、より複雑な機能の実装方法を説明します。 \ No newline at end of file diff --git a/api/core/tools/docs/en_US/tool_scale_out.md b/api/core/tools/docs/en_US/tool_scale_out.md index 121b7a5a76..1deaf04a47 100644 --- a/api/core/tools/docs/en_US/tool_scale_out.md +++ b/api/core/tools/docs/en_US/tool_scale_out.md @@ -245,4 +245,4 @@ After the above steps are completed, we can see this tool on the frontend, and i Of course, because google_search needs a credential, before using it, you also need to input your credentials on the frontend. -![Alt text](../zh_Hans/images/index/image-2.png) +![Alt text](../images/index/image-2.png) diff --git a/api/core/tools/docs/zh_Hans/images/index/image-1.png b/api/core/tools/docs/images/index/image-1.png similarity index 100% rename from api/core/tools/docs/zh_Hans/images/index/image-1.png rename to api/core/tools/docs/images/index/image-1.png diff --git a/api/core/tools/docs/zh_Hans/images/index/image-2.png b/api/core/tools/docs/images/index/image-2.png similarity index 100% rename from api/core/tools/docs/zh_Hans/images/index/image-2.png rename to api/core/tools/docs/images/index/image-2.png diff --git a/api/core/tools/docs/zh_Hans/images/index/image.png b/api/core/tools/docs/images/index/image.png similarity index 100% rename from api/core/tools/docs/zh_Hans/images/index/image.png rename to api/core/tools/docs/images/index/image.png diff --git a/api/core/tools/docs/ja_JP/advanced_scale_out.md b/api/core/tools/docs/ja_JP/advanced_scale_out.md new file mode 100644 index 0000000000..96f843354f --- /dev/null +++ b/api/core/tools/docs/ja_JP/advanced_scale_out.md @@ -0,0 +1,283 @@ +# 高度なツール統合 + +このガイドを始める前に、Difyのツール統合プロセスの基本を理解していることを確認してください。簡単な概要については[クイック統合](./tool_scale_out.md)をご覧ください。 + +## ツールインターフェース + +より複雑なツールを迅速に構築するのを支援するため、`Tool`クラスに一連のヘルパーメソッドを定義しています。 + +### メッセージの返却 + +Difyは`テキスト`、`リンク`、`画像`、`ファイルBLOB`、`JSON`などの様々なメッセージタイプをサポートしています。以下のインターフェースを通じて、異なるタイプのメッセージをLLMとユーザーに返すことができます。 + +注意:以下のインターフェースの一部のパラメータについては、後のセクションで説明します。 + +#### 画像URL +画像のURLを渡すだけで、Difyが自動的に画像をダウンロードしてユーザーに返します。 + +```python + def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage: + """ + create an image message + + :param image: the url of the image + :param save_as: save as + :return: the image message + """ +``` + +#### リンク +リンクを返す必要がある場合は、以下のインターフェースを使用できます。 + +```python + def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage: + """ + create a link message + + :param link: the url of the link + :param save_as: save as + :return: the link message + """ +``` + +#### テキスト +テキストメッセージを返す必要がある場合は、以下のインターフェースを使用できます。 + +```python + def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage: + """ + create a text message + + :param text: the text of the message + :param save_as: save as + :return: the text message + """ +``` + +#### ファイルBLOB +画像、音声、動画、PPT、Word、Excelなどのファイルの生データを返す必要がある場合は、以下のインターフェースを使用できます。 + +- `blob` ファイルの生データ(bytes型) +- `meta` ファイルのメタデータ。ファイルの種類が分かっている場合は、`mime_type`を渡すことをお勧めします。そうでない場合、Difyはデフォルトタイプとして`octet/stream`を使用します。 + +```python + def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage: + """ + create a blob message + + :param blob: the blob + :param meta: meta + :param save_as: save as + :return: the blob message + """ +``` + +#### JSON +フォーマットされたJSONを返す必要がある場合は、以下のインターフェースを使用できます。これは通常、ワークフロー内のノード間のデータ伝送に使用されますが、エージェントモードでは、ほとんどの大規模言語モデルもJSONを読み取り、理解することができます。 + +- `object` Pythonの辞書オブジェクトで、自動的にJSONにシリアライズされます。 + +```python + def create_json_message(self, object: dict) -> ToolInvokeMessage: + """ + create a json message + """ +``` + +### ショートカットツール + +大規模モデルアプリケーションでは、以下の2つの一般的なニーズがあります: +- まず長いテキストを事前に要約し、その要約内容をLLMに渡すことで、元のテキストが長すぎてLLMが処理できない問題を防ぐ +- ツールが取得したコンテンツがリンクである場合、Webページ情報をクロールしてからLLMに返す必要がある + +開発者がこれら2つのニーズを迅速に実装できるよう、以下の2つのショートカットツールを提供しています。 + +#### テキスト要約ツール + +このツールはuser_idと要約するテキストを入力として受け取り、要約されたテキストを返します。Difyは現在のワークスペースのデフォルトモデルを使用して長文を要約します。 + +```python + def summary(self, user_id: str, content: str) -> str: + """ + summary the content + + :param user_id: the user id + :param content: the content + :return: the summary + """ +``` + +#### Webページクローリングツール + +このツールはクロールするWebページのリンクとユーザーエージェント(空でも可)を入力として受け取り、そのWebページの情報を含む文字列を返します。`user_agent`はオプションのパラメータで、ツールを識別するために使用できます。渡さない場合、Difyはデフォルトの`user_agent`を使用します。 + +```python + def get_url(self, url: str, user_agent: str = None) -> str: + """ + get url from the crawled result + """ +``` + +### 変数プール + +`Tool`内に変数プールを導入し、ツールの実行中に生成された変数やファイルなどを保存します。これらの変数は、ツールの実行中に他のツールが使用することができます。 + +次に、`DallE3`と`Vectorizer.AI`を例に、変数プールの使用方法を紹介します。 + +- `DallE3`は画像生成ツールで、テキストに基づいて画像を生成できます。ここでは、`DallE3`にカフェのロゴを生成させます。 +- `Vectorizer.AI`はベクター画像変換ツールで、画像をベクター画像に変換できるため、画像を無限に拡大しても品質が損なわれません。ここでは、`DallE3`が生成したPNGアイコンをベクター画像に変換し、デザイナーが実際に使用できるようにします。 + +#### DallE3 +まず、DallE3を使用します。画像を作成した後、その画像を変数プールに保存します。コードは以下の通りです: + +```python +from typing import Any, Dict, List, Union +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + +from base64 import b64decode + +from openai import OpenAI + +class DallE3Tool(BuiltinTool): + def _invoke(self, + user_id: str, + tool_parameters: Dict[str, Any], + ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]: + """ + invoke tools + """ + client = OpenAI( + api_key=self.runtime.credentials['openai_api_key'], + ) + + # prompt + prompt = tool_parameters.get('prompt', '') + if not prompt: + return self.create_text_message('Please input prompt') + + # call openapi dalle3 + response = client.images.generate( + prompt=prompt, model='dall-e-3', + size='1024x1024', n=1, style='vivid', quality='standard', + response_format='b64_json' + ) + + result = [] + for image in response.data: + # Save all images to the variable pool through the save_as parameter. The variable name is self.VARIABLE_KEY.IMAGE.value. If new images are generated later, they will overwrite the previous images. + result.append(self.create_blob_message(blob=b64decode(image.b64_json), + meta={ 'mime_type': 'image/png' }, + save_as=self.VARIABLE_KEY.IMAGE.value)) + + return result +``` + +ここでは画像の変数名として`self.VARIABLE_KEY.IMAGE.value`を使用していることに注意してください。開発者のツールが互いに連携できるよう、この`KEY`を定義しました。自由に使用することも、この`KEY`を使用しないこともできます。カスタムのKEYを渡すこともできます。 + +#### Vectorizer.AI +次に、Vectorizer.AIを使用して、DallE3が生成したPNGアイコンをベクター画像に変換します。ここで定義した関数を見てみましょう。コードは以下の通りです: + +```python +from core.tools.tool.builtin_tool import BuiltinTool +from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter +from core.tools.errors import ToolProviderCredentialValidationError + +from typing import Any, Dict, List, Union +from httpx import post +from base64 import b64decode + +class VectorizerTool(BuiltinTool): + def _invoke(self, user_id: str, tool_parameters: Dict[str, Any]) + -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]: + """ + Tool invocation, the image variable name needs to be passed in from here, so that we can get the image from the variable pool + """ + + + def get_runtime_parameters(self) -> List[ToolParameter]: + """ + Override the tool parameter list, we can dynamically generate the parameter list based on the actual situation in the current variable pool, so that the LLM can generate the form based on the parameter list + """ + + + def is_tool_available(self) -> bool: + """ + Whether the current tool is available, if there is no image in the current variable pool, then we don't need to display this tool, just return False here + """ +``` + +次に、これら3つの関数を実装します: + +```python +from core.tools.tool.builtin_tool import BuiltinTool +from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter +from core.tools.errors import ToolProviderCredentialValidationError + +from typing import Any, Dict, List, Union +from httpx import post +from base64 import b64decode + +class VectorizerTool(BuiltinTool): + def _invoke(self, user_id: str, tool_parameters: Dict[str, Any]) + -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]: + """ + invoke tools + """ + api_key_name = self.runtime.credentials.get('api_key_name', None) + api_key_value = self.runtime.credentials.get('api_key_value', None) + + if not api_key_name or not api_key_value: + raise ToolProviderCredentialValidationError('Please input api key name and value') + + # Get image_id, the definition of image_id can be found in get_runtime_parameters + image_id = tool_parameters.get('image_id', '') + if not image_id: + return self.create_text_message('Please input image id') + + # Get the image generated by DallE from the variable pool + image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE) + if not image_binary: + return self.create_text_message('Image not found, please request user to generate image firstly.') + + # Generate vector image + response = post( + 'https://vectorizer.ai/api/v1/vectorize', + files={ 'image': image_binary }, + data={ 'mode': 'test' }, + auth=(api_key_name, api_key_value), + timeout=30 + ) + + if response.status_code != 200: + raise Exception(response.text) + + return [ + self.create_text_message('the vectorized svg is saved as an image.'), + self.create_blob_message(blob=response.content, + meta={'mime_type': 'image/svg+xml'}) + ] + + def get_runtime_parameters(self) -> List[ToolParameter]: + """ + override the runtime parameters + """ + # Here, we override the tool parameter list, define the image_id, and set its option list to all images in the current variable pool. The configuration here is consistent with the configuration in yaml. + return [ + ToolParameter.get_simple_instance( + name='image_id', + llm_description=f'the image id that you want to vectorize, \ + and the image id should be specified in \ + {[i.name for i in self.list_default_image_variables()]}', + type=ToolParameter.ToolParameterType.SELECT, + required=True, + options=[i.name for i in self.list_default_image_variables()] + ) + ] + + def is_tool_available(self) -> bool: + # Only when there are images in the variable pool, the LLM needs to use this tool + return len(self.list_default_image_variables()) > 0 +``` + +ここで注目すべきは、実際には`image_id`を使用していないことです。このツールを呼び出す際には、デフォルトの変数プールに必ず画像があると仮定し、直接`image_binary = self.get_variable_file(self.VARIABLE_KEY.IMAGE)`を使用して画像を取得しています。モデルの能力が弱い場合、開発者にもこの方法を推奨します。これにより、エラー許容度を効果的に向上させ、モデルが誤ったパラメータを渡すのを防ぐことができます。 \ No newline at end of file diff --git a/api/core/tools/docs/ja_JP/tool_scale_out.md b/api/core/tools/docs/ja_JP/tool_scale_out.md new file mode 100644 index 0000000000..a721023d00 --- /dev/null +++ b/api/core/tools/docs/ja_JP/tool_scale_out.md @@ -0,0 +1,240 @@ +# ツールの迅速な統合 + +ここでは、GoogleSearchを例にツールを迅速に統合する方法を紹介します。 + +## 1. ツールプロバイダーのyamlを準備する + +### 概要 + +このyamlファイルには、プロバイダー名、アイコン、作者などの詳細情報が含まれ、フロントエンドでの柔軟な表示を可能にします。 + +### 例 + +`core/tools/provider/builtin`の下に`google`モジュール(フォルダ)を作成し、`google.yaml`を作成します。名前はモジュール名と一致している必要があります。 + +以降、このツールに関するすべての操作はこのモジュール内で行います。 + +```yaml +identity: # ツールプロバイダーの基本情報 + author: Dify # 作者 + name: google # 名前(一意、他のプロバイダーと重複不可) + label: # フロントエンド表示用のラベル + en_US: Google # 英語ラベル + zh_Hans: Google # 中国語ラベル + description: # フロントエンド表示用の説明 + en_US: Google # 英語説明 + zh_Hans: Google # 中国語説明 + icon: icon.svg # アイコン(現在のモジュールの_assetsフォルダに配置) + tags: # タグ(フロントエンド表示用) + - search +``` + +- `identity`フィールドは必須で、ツールプロバイダーの基本情報(作者、名前、ラベル、説明、アイコンなど)が含まれます。 + - アイコンは現在のモジュールの`_assets`フォルダに配置する必要があります。[こちら](../../provider/builtin/google/_assets/icon.svg)を参照してください。 + - タグはフロントエンドでの表示に使用され、ユーザーがこのツールプロバイダーを素早く見つけるのに役立ちます。現在サポートされているすべてのタグは以下の通りです: + ```python + class ToolLabelEnum(Enum): + SEARCH = 'search' + IMAGE = 'image' + VIDEOS = 'videos' + WEATHER = 'weather' + FINANCE = 'finance' + DESIGN = 'design' + TRAVEL = 'travel' + SOCIAL = 'social' + NEWS = 'news' + MEDICAL = 'medical' + PRODUCTIVITY = 'productivity' + EDUCATION = 'education' + BUSINESS = 'business' + ENTERTAINMENT = 'entertainment' + UTILITIES = 'utilities' + OTHER = 'other' + ``` + +## 2. プロバイダーの認証情報を準備する + +GoogleはSerpApiが提供するAPIを使用するサードパーティツールであり、SerpApiを使用するにはAPI Keyが必要です。つまり、このツールを使用するには認証情報が必要です。一方、`wikipedia`のようなツールでは認証情報フィールドを記入する必要はありません。[こちら](../../provider/builtin/wikipedia/wikipedia.yaml)を参照してください。 + +認証情報フィールドを設定すると、以下のようになります: + +```yaml +identity: + author: Dify + name: google + label: + en_US: Google + zh_Hans: Google + description: + en_US: Google + zh_Hans: Google + icon: icon.svg +credentials_for_provider: # 認証情報フィールド + serpapi_api_key: # 認証情報フィールド名 + type: secret-input # 認証情報フィールドタイプ + required: true # 必須かどうか + label: # 認証情報フィールドラベル + en_US: SerpApi API key # 英語ラベル + zh_Hans: SerpApi API key # 中国語ラベル + placeholder: # 認証情報フィールドプレースホルダー + en_US: Please input your SerpApi API key # 英語プレースホルダー + zh_Hans: 请输入你的 SerpApi API key # 中国語プレースホルダー + help: # 認証情報フィールドヘルプテキスト + en_US: Get your SerpApi API key from SerpApi # 英語ヘルプテキスト + zh_Hans: 从 SerpApi 获取您的 SerpApi API key # 中国語ヘルプテキスト + url: https://serpapi.com/manage-api-key # 認証情報フィールドヘルプリンク +``` + +- `type`:認証情報フィールドタイプ。現在、`secret-input`、`text-input`、`select`の3種類をサポートしており、それぞれパスワード入力ボックス、テキスト入力ボックス、ドロップダウンボックスに対応します。`secret-input`の場合、フロントエンドで入力内容が隠され、バックエンドで入力内容が暗号化されます。 + +## 3. ツールのyamlを準備する + +1つのプロバイダーの下に複数のツールを持つことができ、各ツールにはyamlファイルが必要です。このファイルにはツールの基本情報、パラメータ、出力などが含まれます。 + +引き続きGoogleSearchを例に、`google`モジュールの下に`tools`モジュールを作成し、`tools/google_search.yaml`を作成します。内容は以下の通りです: + +```yaml +identity: # ツールの基本情報 + name: google_search # ツール名(一意、他のツールと重複不可) + author: Dify # 作者 + label: # フロントエンド表示用のラベル + en_US: GoogleSearch # 英語ラベル + zh_Hans: 谷歌搜索 # 中国語ラベル +description: # フロントエンド表示用の説明 + human: # フロントエンド表示用の紹介(多言語対応) + en_US: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query. + zh_Hans: 一个用于执行 Google SERP 搜索并提取片段和网页的工具。输入应该是一个搜索查询。 + llm: A tool for performing a Google SERP search and extracting snippets and webpages. Input should be a search query. # LLMに渡す紹介文。LLMがこのツールをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。 +parameters: # パラメータリスト + - name: query # パラメータ名 + type: string # パラメータタイプ + required: true # 必須かどうか + label: # パラメータラベル + en_US: Query string # 英語ラベル + zh_Hans: 查询语句 # 中国語ラベル + human_description: # フロントエンド表示用の紹介(多言語対応) + en_US: used for searching + zh_Hans: 用于搜索网页内容 + llm_description: key words for searching # LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。 + form: llm # フォームタイプ。llmはこのパラメータがAgentによって推論される必要があることを意味し、フロントエンドではこのパラメータは表示されません。 + - name: result_type + type: select # パラメータタイプ + required: true + options: # ドロップダウンボックスのオプション + - value: text + label: + en_US: text + zh_Hans: 文本 + - value: link + label: + en_US: link + zh_Hans: 链接 + default: link + label: + en_US: Result type + zh_Hans: 结果类型 + human_description: + en_US: used for selecting the result type, text or link + zh_Hans: 用于选择结果类型,使用文本还是链接进行展示 + form: form # フォームタイプ。formはこのパラメータが対話開始前にフロントエンドでユーザーによって入力される必要があることを意味します。 +``` + +- `identity`フィールドは必須で、ツールの基本情報(名前、作者、ラベル、説明など)が含まれます。 +- `parameters` パラメータリスト + - `name`(必須)パラメータ名。一意で、他のパラメータと重複しないようにしてください。 + - `type`(必須)パラメータタイプ。現在、`string`、`number`、`boolean`、`select`、`secret-input`の5種類をサポートしており、それぞれ文字列、数値、ブール値、ドロップダウンボックス、暗号化入力ボックスに対応します。機密情報には`secret-input`タイプの使用をお勧めします。 + - `label`(必須)パラメータラベル。フロントエンド表示用です。 + - `form`(必須)フォームタイプ。現在、`llm`と`form`の2種類をサポートしています。 + - エージェントアプリケーションでは、`llm`はこのパラメータがLLM自身によって推論されることを示し、`form`はこのツールを使用するために事前に設定できるパラメータであることを示します。 + - ワークフローアプリケーションでは、`llm`と`form`の両方がフロントエンドで入力する必要がありますが、`llm`のパラメータはツールノードの入力変数として使用されます。 + - `required` パラメータが必須かどうかを示します。 + - `llm`モードでは、パラメータが必須の場合、Agentはこのパラメータを推論する必要があります。 + - `form`モードでは、パラメータが必須の場合、ユーザーは対話開始前にフロントエンドでこのパラメータを入力する必要があります。 + - `options` パラメータオプション + - `llm`モードでは、DifyはすべてのオプションをLLMに渡し、LLMはこれらのオプションに基づいて推論できます。 + - `form`モードで、`type`が`select`の場合、フロントエンドはこれらのオプションを表示します。 + - `default` デフォルト値 + - `min` 最小値。パラメータタイプが`number`の場合に設定できます。 + - `max` 最大値。パラメータタイプが`number`の場合に設定できます。 + - `human_description` フロントエンド表示用の紹介。多言語対応です。 + - `placeholder` 入力ボックスのプロンプトテキスト。フォームタイプが`form`で、パラメータタイプが`string`、`number`、`secret-input`の場合に設定できます。多言語対応です。 + - `llm_description` LLMに渡す紹介文。LLMがこのパラメータをより理解できるよう、できるだけ詳細な情報を記述することをお勧めします。 + +## 4. ツールコードを準備する + +ツールの設定が完了したら、ツールのロジックを実装するコードを作成します。 + +`google/tools`モジュールの下に`google_search.py`を作成し、内容は以下の通りです: + +```python +from core.tools.tool.builtin_tool import BuiltinTool +from core.tools.entities.tool_entities import ToolInvokeMessage + +from typing import Any, Dict, List, Union + +class GoogleSearchTool(BuiltinTool): + def _invoke(self, + user_id: str, + tool_parameters: Dict[str, Any], + ) -> Union[ToolInvokeMessage, List[ToolInvokeMessage]]: + """ + ツールを呼び出す + """ + query = tool_parameters['query'] + result_type = tool_parameters['result_type'] + api_key = self.runtime.credentials['serpapi_api_key'] + result = SerpAPI(api_key).run(query, result_type=result_type) + + if result_type == 'text': + return self.create_text_message(text=result) + return self.create_link_message(link=result) +``` + +### パラメータ +ツールの全体的なロジックは`_invoke`メソッドにあります。このメソッドは2つのパラメータ(`user_id`とtool_parameters`)を受け取り、それぞれユーザーIDとツールパラメータを表します。 + +### 戻り値 +ツールの戻り値として、1つのメッセージまたは複数のメッセージを選択できます。ここでは1つのメッセージを返しています。`create_text_message`と`create_link_message`を使用して、テキストメッセージまたはリンクメッセージを作成できます。複数のメッセージを返す場合は、リストを構築できます(例:`[self.create_text_message('msg1'), self.create_text_message('msg2')]`)。 + +## 5. プロバイダーコードを準備する + +最後に、プロバイダーモジュールの下にプロバイダークラスを作成し、プロバイダーの認証情報検証ロジックを実装する必要があります。認証情報の検証が失敗した場合、`ToolProviderCredentialValidationError`例外が発生します。 + +`google`モジュールの下に`google.py`を作成し、内容は以下の通りです: + +```python +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController +from core.tools.errors import ToolProviderCredentialValidationError + +from core.tools.provider.builtin.google.tools.google_search import GoogleSearchTool + +from typing import Any, Dict + +class GoogleProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: Dict[str, Any]) -> None: + try: + # 1. ここでGoogleSearchTool()を使ってGoogleSearchToolをインスタンス化する必要があります。これによりGoogleSearchToolのyaml設定が自動的に読み込まれますが、この時点では認証情報は含まれていません + # 2. 次に、fork_tool_runtimeメソッドを使用して、現在の認証情報をGoogleSearchToolに渡す必要があります + # 3. 最後に、invokeを呼び出します。パラメータはGoogleSearchToolのyamlで設定されたパラメータルールに従って渡す必要があります + GoogleSearchTool().fork_tool_runtime( + meta={ + "credentials": credentials, + } + ).invoke( + user_id='', + tool_parameters={ + "query": "test", + "result_type": "link" + }, + ) + except Exception as e: + raise ToolProviderCredentialValidationError(str(e)) +``` + +## 完了 + +以上のステップが完了すると、このツールをフロントエンドで確認し、Agentで使用することができるようになります。 + +もちろん、google_searchには認証情報が必要なため、使用する前にフロントエンドで認証情報を入力する必要があります。 + +![Alt text](../images/index/image-2.png) \ No newline at end of file diff --git a/api/core/tools/docs/zh_Hans/tool_scale_out.md b/api/core/tools/docs/zh_Hans/tool_scale_out.md index 06a8d9a4f9..ec61e4677b 100644 --- a/api/core/tools/docs/zh_Hans/tool_scale_out.md +++ b/api/core/tools/docs/zh_Hans/tool_scale_out.md @@ -234,4 +234,4 @@ class GoogleProvider(BuiltinToolProviderController): 当然,因为google_search需要一个凭据,在使用之前,还需要在前端配置它的凭据。 -![Alt text](images/index/image-2.png) +![Alt text](../images/index/image-2.png) From c8b9bdebfe88e26a13abf36330420b77c3106b16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=86=E8=90=8C=E9=97=B7=E6=B2=B9=E7=93=B6?= <253605712@qq.com> Date: Sun, 22 Sep 2024 10:08:35 +0800 Subject: [PATCH 26/40] feat:use xinference tts stream mode (#8616) --- .../model_providers/xinference/llm/llm.py | 3 +-- .../model_providers/xinference/tts/tts.py | 12 ++++++------ api/poetry.lock | 8 ++++---- api/pyproject.toml | 2 +- .../model_runtime/__mock/xinference.py | 5 +---- 5 files changed, 13 insertions(+), 17 deletions(-) diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py index 4fadda5df5..286640079b 100644 --- a/api/core/model_runtime/model_providers/xinference/llm/llm.py +++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py @@ -19,7 +19,6 @@ from openai.types.chat.chat_completion_message import FunctionCall from openai.types.completion import Completion from xinference_client.client.restful.restful_client import ( Client, - RESTfulChatglmCppChatModelHandle, RESTfulChatModelHandle, RESTfulGenerateModelHandle, ) @@ -491,7 +490,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel): if tools and len(tools) > 0: generate_config["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools] vision = credentials.get("support_vision", False) - if isinstance(xinference_model, RESTfulChatModelHandle | RESTfulChatglmCppChatModelHandle): + if isinstance(xinference_model, RESTfulChatModelHandle): resp = client.chat.completions.create( model=credentials["model_uid"], messages=[self._convert_prompt_message_to_dict(message) for message in prompt_messages], diff --git a/api/core/model_runtime/model_providers/xinference/tts/tts.py b/api/core/model_runtime/model_providers/xinference/tts/tts.py index 10538b5788..81dbe397d2 100644 --- a/api/core/model_runtime/model_providers/xinference/tts/tts.py +++ b/api/core/model_runtime/model_providers/xinference/tts/tts.py @@ -208,21 +208,21 @@ class XinferenceText2SpeechModel(TTSModel): executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences))) futures = [ executor.submit( - handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=False + handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=True ) for i in range(len(sentences)) ] for future in futures: response = future.result() - for i in range(0, len(response), 1024): - yield response[i : i + 1024] + for chunk in response: + yield chunk else: response = handle.speech( - input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=False + input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=True ) - for i in range(0, len(response), 1024): - yield response[i : i + 1024] + for chunk in response: + yield chunk except Exception as ex: raise InvokeBadRequestError(str(ex)) diff --git a/api/poetry.lock b/api/poetry.lock index 8d4d680031..e532394a24 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -10014,13 +10014,13 @@ h11 = ">=0.9.0,<1" [[package]] name = "xinference-client" -version = "0.13.3" +version = "0.15.2" description = "Client for Xinference" optional = false python-versions = "*" files = [ - {file = "xinference-client-0.13.3.tar.gz", hash = "sha256:822b722100affdff049c27760be7d62ac92de58c87a40d3361066df446ba648f"}, - {file = "xinference_client-0.13.3-py3-none-any.whl", hash = "sha256:f0eff3858b1ebcef2129726f82b09259c177e11db466a7ca23def3d4849c419f"}, + {file = "xinference-client-0.15.2.tar.gz", hash = "sha256:5c2259bb133148d1cc9bd2b8ec6eb8b5bbeba7f11d6252959f4e6cd79baa53ed"}, + {file = "xinference_client-0.15.2-py3-none-any.whl", hash = "sha256:b6275adab695e75e75a33e21e0ad212488fc2d5a4d0f693d544c0e78469abbe3"}, ] [package.dependencies] @@ -10422,4 +10422,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "18924ae12a00bde4438a46168bc167ed69613ab1ab0c387f193cd47ac24379b2" +content-hash = "85aa4be7defee8fe6622cf95ba03e81895121502ebf6d666d6ce376ff019fac7" diff --git a/api/pyproject.toml b/api/pyproject.toml index 829c39a12b..1f483fc49f 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -203,7 +203,7 @@ transformers = "~4.35.0" unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] } websocket-client = "~1.7.0" werkzeug = "~3.0.1" -xinference-client = "0.13.3" +xinference-client = "0.15.2" yarl = "~1.9.4" zhipuai = "1.0.7" # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group. diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py index 299523f4f5..8deb50635f 100644 --- a/api/tests/integration_tests/model_runtime/__mock/xinference.py +++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py @@ -9,7 +9,6 @@ from requests.exceptions import ConnectionError from requests.sessions import Session from xinference_client.client.restful.restful_client import ( Client, - RESTfulChatglmCppChatModelHandle, RESTfulChatModelHandle, RESTfulEmbeddingModelHandle, RESTfulGenerateModelHandle, @@ -19,9 +18,7 @@ from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage class MockXinferenceClass: - def get_chat_model( - self: Client, model_uid: str - ) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]: + def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]: if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url): raise RuntimeError("404 Not Found") From 0665268578599ca340ff134eec041f17364ef937 Mon Sep 17 00:00:00 2001 From: ice yao Date: Sun, 22 Sep 2024 10:13:00 +0800 Subject: [PATCH 27/40] Add Fireworks AI as new model provider (#8428) --- .../model_providers/_position.yaml | 1 + .../model_providers/fireworks/__init__.py | 0 .../fireworks/_assets/icon_l_en.svg | 3 + .../fireworks/_assets/icon_s_en.svg | 5 + .../model_providers/fireworks/_common.py | 52 ++ .../model_providers/fireworks/fireworks.py | 27 + .../model_providers/fireworks/fireworks.yaml | 29 + .../model_providers/fireworks/llm/__init__.py | 0 .../fireworks/llm/_position.yaml | 16 + .../fireworks/llm/firefunction-v1.yaml | 46 ++ .../fireworks/llm/firefunction-v2.yaml | 46 ++ .../fireworks/llm/gemma2-9b-it.yaml | 45 ++ .../llm/llama-v3-70b-instruct-hf.yaml | 46 ++ .../fireworks/llm/llama-v3-70b-instruct.yaml | 46 ++ .../llm/llama-v3-8b-instruct-hf.yaml | 46 ++ .../fireworks/llm/llama-v3-8b-instruct.yaml | 46 ++ .../llm/llama-v3p1-405b-instruct.yaml | 46 ++ .../llm/llama-v3p1-70b-instruct.yaml | 46 ++ .../fireworks/llm/llama-v3p1-8b-instruct.yaml | 46 ++ .../model_providers/fireworks/llm/llm.py | 610 ++++++++++++++++++ .../fireworks/llm/mixtral-8x22b-instruct.yaml | 46 ++ .../llm/mixtral-8x7b-instruct-hf.yaml | 46 ++ .../fireworks/llm/mixtral-8x7b-instruct.yaml | 46 ++ .../fireworks/llm/mythomax-l2-13b.yaml | 46 ++ .../llm/phi-3-vision-128k-instruct.yaml | 46 ++ .../fireworks/llm/yi-large.yaml | 45 ++ api/pyproject.toml | 1 + .../model_runtime/fireworks/__init__.py | 0 .../model_runtime/fireworks/test_llm.py | 186 ++++++ .../model_runtime/fireworks/test_provider.py | 17 + dev/pytest/pytest_model_runtime.sh | 4 +- 31 files changed, 1683 insertions(+), 2 deletions(-) create mode 100644 api/core/model_runtime/model_providers/fireworks/__init__.py create mode 100644 api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg create mode 100644 api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg create mode 100644 api/core/model_runtime/model_providers/fireworks/_common.py create mode 100644 api/core/model_runtime/model_providers/fireworks/fireworks.py create mode 100644 api/core/model_runtime/model_providers/fireworks/fireworks.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/__init__.py create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/_position.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/llm.py create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml create mode 100644 api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml create mode 100644 api/tests/integration_tests/model_runtime/fireworks/__init__.py create mode 100644 api/tests/integration_tests/model_runtime/fireworks/test_llm.py create mode 100644 api/tests/integration_tests/model_runtime/fireworks/test_provider.py diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml index d10314ba03..1f5f64019a 100644 --- a/api/core/model_runtime/model_providers/_position.yaml +++ b/api/core/model_runtime/model_providers/_position.yaml @@ -37,3 +37,4 @@ - siliconflow - perfxcloud - zhinao +- fireworks diff --git a/api/core/model_runtime/model_providers/fireworks/__init__.py b/api/core/model_runtime/model_providers/fireworks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg new file mode 100644 index 0000000000..582605cc42 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_l_en.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg new file mode 100644 index 0000000000..86eeba66f9 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/_assets/icon_s_en.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/api/core/model_runtime/model_providers/fireworks/_common.py b/api/core/model_runtime/model_providers/fireworks/_common.py new file mode 100644 index 0000000000..378ced3a40 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/_common.py @@ -0,0 +1,52 @@ +from collections.abc import Mapping + +import openai + +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class _CommonFireworks: + def _to_credential_kwargs(self, credentials: Mapping) -> dict: + """ + Transform credentials to kwargs for model instance + + :param credentials: + :return: + """ + credentials_kwargs = { + "api_key": credentials["fireworks_api_key"], + "base_url": "https://api.fireworks.ai/inference/v1", + "max_retries": 1, + } + + return credentials_kwargs + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError], + InvokeServerUnavailableError: [openai.InternalServerError], + InvokeRateLimitError: [openai.RateLimitError], + InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError], + InvokeBadRequestError: [ + openai.BadRequestError, + openai.NotFoundError, + openai.UnprocessableEntityError, + openai.APIError, + ], + } diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.py b/api/core/model_runtime/model_providers/fireworks/fireworks.py new file mode 100644 index 0000000000..15f25badab --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/fireworks.py @@ -0,0 +1,27 @@ +import logging + +from core.model_runtime.entities.model_entities import ModelType +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.model_provider import ModelProvider + +logger = logging.getLogger(__name__) + + +class FireworksProvider(ModelProvider): + def validate_provider_credentials(self, credentials: dict) -> None: + """ + Validate provider credentials + if validate failed, raise exception + + :param credentials: provider credentials, credentials form defined in `provider_credential_schema`. + """ + try: + model_instance = self.get_model_instance(ModelType.LLM) + model_instance.validate_credentials( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", credentials=credentials + ) + except CredentialsValidateFailedError as ex: + raise ex + except Exception as ex: + logger.exception(f"{self.get_provider_schema().provider} credentials validate failed") + raise ex diff --git a/api/core/model_runtime/model_providers/fireworks/fireworks.yaml b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml new file mode 100644 index 0000000000..f886fa23b5 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/fireworks.yaml @@ -0,0 +1,29 @@ +provider: fireworks +label: + zh_Hans: Fireworks AI + en_US: Fireworks AI +icon_small: + en_US: icon_s_en.svg +icon_large: + en_US: icon_l_en.svg +background: "#FCFDFF" +help: + title: + en_US: Get your API Key from Fireworks AI + zh_Hans: 从 Fireworks AI 获取 API Key + url: + en_US: https://fireworks.ai/account/api-keys +supported_model_types: + - llm +configurate_methods: + - predefined-model +provider_credential_schema: + credential_form_schemas: + - variable: fireworks_api_key + label: + en_US: API Key + type: secret-input + required: true + placeholder: + zh_Hans: 在此输入您的 API Key + en_US: Enter your API Key diff --git a/api/core/model_runtime/model_providers/fireworks/llm/__init__.py b/api/core/model_runtime/model_providers/fireworks/llm/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml new file mode 100644 index 0000000000..9f7c1af68c --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/_position.yaml @@ -0,0 +1,16 @@ +- llama-v3p1-405b-instruct +- llama-v3p1-70b-instruct +- llama-v3p1-8b-instruct +- llama-v3-70b-instruct +- mixtral-8x22b-instruct +- mixtral-8x7b-instruct +- firefunction-v2 +- firefunction-v1 +- gemma2-9b-it +- llama-v3-70b-instruct-hf +- llama-v3-8b-instruct +- llama-v3-8b-instruct-hf +- mixtral-8x7b-instruct-hf +- mythomax-l2-13b +- phi-3-vision-128k-instruct +- yi-large diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml new file mode 100644 index 0000000000..f6bac12832 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v1.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/firefunction-v1 +label: + zh_Hans: Firefunction V1 + en_US: Firefunction V1 +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.5' + output: '0.5' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml new file mode 100644 index 0000000000..2979cb46d5 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/firefunction-v2.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/firefunction-v2 +label: + zh_Hans: Firefunction V2 + en_US: Firefunction V2 +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.9' + output: '0.9' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml new file mode 100644 index 0000000000..ee41a7e2fd --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/gemma2-9b-it.yaml @@ -0,0 +1,45 @@ +model: accounts/fireworks/models/gemma2-9b-it +label: + zh_Hans: Gemma2 9B Instruct + en_US: Gemma2 9B Instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml new file mode 100644 index 0000000000..2ae89b8816 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct-hf.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3-70b-instruct-hf +label: + zh_Hans: Llama3 70B Instruct(HF version) + en_US: Llama3 70B Instruct(HF version) +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.9' + output: '0.9' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml new file mode 100644 index 0000000000..7c24b08ca5 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-70b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3-70b-instruct +label: + zh_Hans: Llama3 70B Instruct + en_US: Llama3 70B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.9' + output: '0.9' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml new file mode 100644 index 0000000000..83507ef3e5 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct-hf.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3-8b-instruct-hf +label: + zh_Hans: Llama3 8B Instruct(HF version) + en_US: Llama3 8B Instruct(HF version) +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml new file mode 100644 index 0000000000..d8ac9537b8 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3-8b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3-8b-instruct +label: + zh_Hans: Llama3 8B Instruct + en_US: Llama3 8B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml new file mode 100644 index 0000000000..c4ddb3e924 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-405b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p1-405b-instruct +label: + zh_Hans: Llama3.1 405B Instruct + en_US: Llama3.1 405B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '3' + output: '3' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml new file mode 100644 index 0000000000..62f84f87fa --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-70b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p1-70b-instruct +label: + zh_Hans: Llama3.1 70B Instruct + en_US: Llama3.1 70B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml new file mode 100644 index 0000000000..9bb99c91b6 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llama-v3p1-8b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/llama-v3p1-8b-instruct +label: + zh_Hans: Llama3.1 8B Instruct + en_US: Llama3.1 8B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/llm.py b/api/core/model_runtime/model_providers/fireworks/llm/llm.py new file mode 100644 index 0000000000..2dcf1adba6 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/llm.py @@ -0,0 +1,610 @@ +import logging +from collections.abc import Generator +from typing import Optional, Union, cast + +from openai import OpenAI, Stream +from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall +from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall +from openai.types.chat.chat_completion_message import FunctionCall + +from core.model_runtime.callbacks.base_callback import Callback +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessage, + PromptMessageContentType, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel +from core.model_runtime.model_providers.fireworks._common import _CommonFireworks + +logger = logging.getLogger(__name__) + +FIREWORKS_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object. +The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure +if you are not sure about the structure. + + +{{instructions}} + +""" # noqa: E501 + + +class FireworksLargeLanguageModel(_CommonFireworks, LargeLanguageModel): + """ + Model class for Fireworks large language model. + """ + + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + """ + Invoke large language model + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param model_parameters: model parameters + :param tools: tools for tool calling + :param stop: stop words + :param stream: is stream response + :param user: unique user id + :return: full response or stream response chunk generator result + """ + + return self._chat_generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def _code_block_mode_wrapper( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + callbacks: Optional[list[Callback]] = None, + ) -> Union[LLMResult, Generator]: + """ + Code block mode wrapper for invoking large language model + """ + if "response_format" in model_parameters and model_parameters["response_format"] in {"JSON", "XML"}: + stop = stop or [] + self._transform_chat_json_prompts( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + response_format=model_parameters["response_format"], + ) + model_parameters.pop("response_format") + + return self._invoke( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def _transform_chat_json_prompts( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, + stream: bool = True, + user: str | None = None, + response_format: str = "JSON", + ) -> None: + """ + Transform json prompts + """ + if stop is None: + stop = [] + if "```\n" not in stop: + stop.append("```\n") + if "\n```" not in stop: + stop.append("\n```") + + if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage): + prompt_messages[0] = SystemPromptMessage( + content=FIREWORKS_BLOCK_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content).replace( + "{{block}}", response_format + ) + ) + prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n")) + else: + prompt_messages.insert( + 0, + SystemPromptMessage( + content=FIREWORKS_BLOCK_MODE_PROMPT.replace( + "{{instructions}}", f"Please output a valid {response_format} object." + ).replace("{{block}}", response_format) + ), + ) + prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}")) + + def get_num_tokens( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + ) -> int: + """ + Get number of tokens for given prompt messages + + :param model: model name + :param credentials: model credentials + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: + """ + return self._num_tokens_from_messages(model, prompt_messages, tools) + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + client.chat.completions.create( + messages=[{"role": "user", "content": "ping"}], model=model, temperature=0, max_tokens=10, stream=False + ) + except Exception as e: + raise CredentialsValidateFailedError(str(e)) + + def _chat_generate( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: Optional[list[PromptMessageTool]] = None, + stop: Optional[list[str]] = None, + stream: bool = True, + user: Optional[str] = None, + ) -> Union[LLMResult, Generator]: + credentials_kwargs = self._to_credential_kwargs(credentials) + client = OpenAI(**credentials_kwargs) + + extra_model_kwargs = {} + + if tools: + extra_model_kwargs["functions"] = [ + {"name": tool.name, "description": tool.description, "parameters": tool.parameters} for tool in tools + ] + + if stop: + extra_model_kwargs["stop"] = stop + + if user: + extra_model_kwargs["user"] = user + + # chat model + response = client.chat.completions.create( + messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages], + model=model, + stream=stream, + **model_parameters, + **extra_model_kwargs, + ) + + if stream: + return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools) + return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) + + def _handle_chat_generate_response( + self, + model: str, + credentials: dict, + response: ChatCompletion, + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + ) -> LLMResult: + """ + Handle llm chat response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response + """ + assistant_message = response.choices[0].message + # assistant_message_tool_calls = assistant_message.tool_calls + assistant_message_function_call = assistant_message.function_call + + # extract tool calls from response + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call(assistant_message_function_call) + tool_calls = [function_call] if function_call else [] + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage(content=assistant_message.content, tool_calls=tool_calls) + + # calculate num tokens + if response.usage: + # transform usage + prompt_tokens = response.usage.prompt_tokens + completion_tokens = response.usage.completion_tokens + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools) + completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message]) + + # transform usage + usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) + + # transform response + response = LLMResult( + model=response.model, + prompt_messages=prompt_messages, + message=assistant_prompt_message, + usage=usage, + system_fingerprint=response.system_fingerprint, + ) + + return response + + def _handle_chat_generate_stream_response( + self, + model: str, + credentials: dict, + response: Stream[ChatCompletionChunk], + prompt_messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + ) -> Generator: + """ + Handle llm chat stream response + + :param model: model name + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response chunk generator + """ + full_assistant_content = "" + delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None + prompt_tokens = 0 + completion_tokens = 0 + final_tool_calls = [] + final_chunk = LLMResultChunk( + model=model, + prompt_messages=prompt_messages, + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage(content=""), + ), + ) + + for chunk in response: + if len(chunk.choices) == 0: + if chunk.usage: + # calculate num tokens + prompt_tokens = chunk.usage.prompt_tokens + completion_tokens = chunk.usage.completion_tokens + continue + + delta = chunk.choices[0] + has_finish_reason = delta.finish_reason is not None + + if ( + not has_finish_reason + and (delta.delta.content is None or delta.delta.content == "") + and delta.delta.function_call is None + ): + continue + + # assistant_message_tool_calls = delta.delta.tool_calls + assistant_message_function_call = delta.delta.function_call + + # extract tool calls from response + if delta_assistant_message_function_call_storage is not None: + # handle process of stream function call + if assistant_message_function_call: + # message has not ended ever + delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments + continue + else: + # message has ended + assistant_message_function_call = delta_assistant_message_function_call_storage + delta_assistant_message_function_call_storage = None + else: + if assistant_message_function_call: + # start of stream function call + delta_assistant_message_function_call_storage = assistant_message_function_call + if delta_assistant_message_function_call_storage.arguments is None: + delta_assistant_message_function_call_storage.arguments = "" + if not has_finish_reason: + continue + + # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls) + function_call = self._extract_response_function_call(assistant_message_function_call) + tool_calls = [function_call] if function_call else [] + if tool_calls: + final_tool_calls.extend(tool_calls) + + # transform assistant message to prompt message + assistant_prompt_message = AssistantPromptMessage(content=delta.delta.content or "", tool_calls=tool_calls) + + full_assistant_content += delta.delta.content or "" + + if has_finish_reason: + final_chunk = LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + finish_reason=delta.finish_reason, + ), + ) + else: + yield LLMResultChunk( + model=chunk.model, + prompt_messages=prompt_messages, + system_fingerprint=chunk.system_fingerprint, + delta=LLMResultChunkDelta( + index=delta.index, + message=assistant_prompt_message, + ), + ) + + if not prompt_tokens: + prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools) + + if not completion_tokens: + full_assistant_prompt_message = AssistantPromptMessage( + content=full_assistant_content, tool_calls=final_tool_calls + ) + completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message]) + + # transform usage + usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) + final_chunk.delta.usage = usage + + yield final_chunk + + def _extract_response_tool_calls( + self, response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall] + ) -> list[AssistantPromptMessage.ToolCall]: + """ + Extract tool calls from response + + :param response_tool_calls: response tool calls + :return: list of tool calls + """ + tool_calls = [] + if response_tool_calls: + for response_tool_call in response_tool_calls: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_tool_call.function.name, arguments=response_tool_call.function.arguments + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_tool_call.id, type=response_tool_call.type, function=function + ) + tool_calls.append(tool_call) + + return tool_calls + + def _extract_response_function_call( + self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall + ) -> AssistantPromptMessage.ToolCall: + """ + Extract function call from response + + :param response_function_call: response function call + :return: tool call + """ + tool_call = None + if response_function_call: + function = AssistantPromptMessage.ToolCall.ToolCallFunction( + name=response_function_call.name, arguments=response_function_call.arguments + ) + + tool_call = AssistantPromptMessage.ToolCall( + id=response_function_call.name, type="function", function=function + ) + + return tool_call + + def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: + """ + Convert PromptMessage to dict for Fireworks API + """ + if isinstance(message, UserPromptMessage): + message = cast(UserPromptMessage, message) + if isinstance(message.content, str): + message_dict = {"role": "user", "content": message.content} + else: + sub_messages = [] + for message_content in message.content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + sub_message_dict = {"type": "text", "text": message_content.data} + sub_messages.append(sub_message_dict) + elif message_content.type == PromptMessageContentType.IMAGE: + message_content = cast(ImagePromptMessageContent, message_content) + sub_message_dict = { + "type": "image_url", + "image_url": {"url": message_content.data, "detail": message_content.detail.value}, + } + sub_messages.append(sub_message_dict) + + message_dict = {"role": "user", "content": sub_messages} + elif isinstance(message, AssistantPromptMessage): + message = cast(AssistantPromptMessage, message) + message_dict = {"role": "assistant", "content": message.content} + if message.tool_calls: + # message_dict["tool_calls"] = [tool_call.dict() for tool_call in + # message.tool_calls] + function_call = message.tool_calls[0] + message_dict["function_call"] = { + "name": function_call.function.name, + "arguments": function_call.function.arguments, + } + elif isinstance(message, SystemPromptMessage): + message = cast(SystemPromptMessage, message) + message_dict = {"role": "system", "content": message.content} + elif isinstance(message, ToolPromptMessage): + message = cast(ToolPromptMessage, message) + # message_dict = { + # "role": "tool", + # "content": message.content, + # "tool_call_id": message.tool_call_id + # } + message_dict = {"role": "function", "content": message.content, "name": message.tool_call_id} + else: + raise ValueError(f"Got unknown type {message}") + + if message.name: + message_dict["name"] = message.name + + return message_dict + + def _num_tokens_from_messages( + self, + model: str, + messages: list[PromptMessage], + tools: Optional[list[PromptMessageTool]] = None, + credentials: dict = None, + ) -> int: + """ + Approximate num tokens with GPT2 tokenizer. + """ + + tokens_per_message = 3 + tokens_per_name = 1 + + num_tokens = 0 + messages_dict = [self._convert_prompt_message_to_dict(m) for m in messages] + for message in messages_dict: + num_tokens += tokens_per_message + for key, value in message.items(): + # Cast str(value) in case the message value is not a string + # This occurs with function messages + # TODO: The current token calculation method for the image type is not implemented, + # which need to download the image and then get the resolution for calculation, + # and will increase the request delay + if isinstance(value, list): + text = "" + for item in value: + if isinstance(item, dict) and item["type"] == "text": + text += item["text"] + + value = text + + if key == "tool_calls": + for tool_call in value: + for t_key, t_value in tool_call.items(): + num_tokens += self._get_num_tokens_by_gpt2(t_key) + if t_key == "function": + for f_key, f_value in t_value.items(): + num_tokens += self._get_num_tokens_by_gpt2(f_key) + num_tokens += self._get_num_tokens_by_gpt2(f_value) + else: + num_tokens += self._get_num_tokens_by_gpt2(t_key) + num_tokens += self._get_num_tokens_by_gpt2(t_value) + else: + num_tokens += self._get_num_tokens_by_gpt2(str(value)) + + if key == "name": + num_tokens += tokens_per_name + + # every reply is primed with assistant + num_tokens += 3 + + if tools: + num_tokens += self._num_tokens_for_tools(tools) + + return num_tokens + + def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int: + """ + Calculate num tokens for tool calling with tiktoken package. + + :param tools: tools for tool calling + :return: number of tokens + """ + num_tokens = 0 + for tool in tools: + num_tokens += self._get_num_tokens_by_gpt2("type") + num_tokens += self._get_num_tokens_by_gpt2("function") + num_tokens += self._get_num_tokens_by_gpt2("function") + + # calculate num tokens for function object + num_tokens += self._get_num_tokens_by_gpt2("name") + num_tokens += self._get_num_tokens_by_gpt2(tool.name) + num_tokens += self._get_num_tokens_by_gpt2("description") + num_tokens += self._get_num_tokens_by_gpt2(tool.description) + parameters = tool.parameters + num_tokens += self._get_num_tokens_by_gpt2("parameters") + if "title" in parameters: + num_tokens += self._get_num_tokens_by_gpt2("title") + num_tokens += self._get_num_tokens_by_gpt2(parameters.get("title")) + num_tokens += self._get_num_tokens_by_gpt2("type") + num_tokens += self._get_num_tokens_by_gpt2(parameters.get("type")) + if "properties" in parameters: + num_tokens += self._get_num_tokens_by_gpt2("properties") + for key, value in parameters.get("properties").items(): + num_tokens += self._get_num_tokens_by_gpt2(key) + for field_key, field_value in value.items(): + num_tokens += self._get_num_tokens_by_gpt2(field_key) + if field_key == "enum": + for enum_field in field_value: + num_tokens += 3 + num_tokens += self._get_num_tokens_by_gpt2(enum_field) + else: + num_tokens += self._get_num_tokens_by_gpt2(field_key) + num_tokens += self._get_num_tokens_by_gpt2(str(field_value)) + if "required" in parameters: + num_tokens += self._get_num_tokens_by_gpt2("required") + for required_field in parameters["required"]: + num_tokens += 3 + num_tokens += self._get_num_tokens_by_gpt2(required_field) + + return num_tokens diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml new file mode 100644 index 0000000000..87d977e26c --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x22b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/mixtral-8x22b-instruct +label: + zh_Hans: Mixtral MoE 8x22B Instruct + en_US: Mixtral MoE 8x22B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 65536 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '1.2' + output: '1.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml new file mode 100644 index 0000000000..e3d5a90858 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct-hf.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/mixtral-8x7b-instruct-hf +label: + zh_Hans: Mixtral MoE 8x7B Instruct(HF version) + en_US: Mixtral MoE 8x7B Instruct(HF version) +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.5' + output: '0.5' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml new file mode 100644 index 0000000000..45f632ceff --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/mixtral-8x7b-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/mixtral-8x7b-instruct +label: + zh_Hans: Mixtral MoE 8x7B Instruct + en_US: Mixtral MoE 8x7B Instruct +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.5' + output: '0.5' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml new file mode 100644 index 0000000000..9c3486ba10 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/mythomax-l2-13b.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/mythomax-l2-13b +label: + zh_Hans: MythoMax L2 13b + en_US: MythoMax L2 13b +model_type: llm +features: + - agent-thought + - tool-call +model_properties: + mode: chat + context_size: 4096 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml new file mode 100644 index 0000000000..e399f2edb1 --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/phi-3-vision-128k-instruct.yaml @@ -0,0 +1,46 @@ +model: accounts/fireworks/models/phi-3-vision-128k-instruct +label: + zh_Hans: Phi3.5 Vision Instruct + en_US: Phi3.5 Vision Instruct +model_type: llm +features: + - agent-thought + - vision +model_properties: + mode: chat + context_size: 8192 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '0.2' + output: '0.2' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml new file mode 100644 index 0000000000..bb4b6f994e --- /dev/null +++ b/api/core/model_runtime/model_providers/fireworks/llm/yi-large.yaml @@ -0,0 +1,45 @@ +model: accounts/yi-01-ai/models/yi-large +label: + zh_Hans: Yi-Large + en_US: Yi-Large +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32768 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + - name: max_tokens + use_template: max_tokens + - name: context_length_exceeded_behavior + default: None + label: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + help: + zh_Hans: 上下文长度超出行为 + en_US: Context Length Exceeded Behavior + type: string + options: + - None + - truncate + - error + - name: response_format + use_template: response_format +pricing: + input: '3' + output: '3' + unit: '0.000001' + currency: USD diff --git a/api/pyproject.toml b/api/pyproject.toml index 1f483fc49f..93482b032d 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -100,6 +100,7 @@ exclude = [ [tool.pytest_env] OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii" UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa" +FIREWORKS_API_KEY = "fw_aaaaaaaaaaaaaaaaaaaa" AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com" AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94" ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz" diff --git a/api/tests/integration_tests/model_runtime/fireworks/__init__.py b/api/tests/integration_tests/model_runtime/fireworks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_llm.py b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py new file mode 100644 index 0000000000..699ca293a2 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/fireworks/test_llm.py @@ -0,0 +1,186 @@ +import os +from collections.abc import Generator + +import pytest + +from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessageTool, + SystemPromptMessage, + UserPromptMessage, +) +from core.model_runtime.entities.model_entities import AIModelEntity +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.fireworks.llm.llm import FireworksLargeLanguageModel + +"""FOR MOCK FIXTURES, DO NOT REMOVE""" +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +def test_predefined_models(): + model = FireworksLargeLanguageModel() + model_schemas = model.predefined_models() + + assert len(model_schemas) >= 1 + assert isinstance(model_schemas[0], AIModelEntity) + + +@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True) +def test_validate_credentials_for_chat_model(setup_openai_mock): + model = FireworksLargeLanguageModel() + + with pytest.raises(CredentialsValidateFailedError): + # model name to gpt-3.5-turbo because of mocking + model.validate_credentials(model="gpt-3.5-turbo", credentials={"fireworks_api_key": "invalid_key"}) + + model.validate_credentials( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + ) + + +@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True) +def test_invoke_chat_model(setup_openai_mock): + model = FireworksLargeLanguageModel() + + result = model.invoke( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + prompt_messages=[ + SystemPromptMessage( + content="You are a helpful AI assistant.", + ), + UserPromptMessage(content="Hello World!"), + ], + model_parameters={ + "temperature": 0.0, + "top_p": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "max_tokens": 10, + }, + stop=["How"], + stream=False, + user="foo", + ) + + assert isinstance(result, LLMResult) + assert len(result.message.content) > 0 + + +@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True) +def test_invoke_chat_model_with_tools(setup_openai_mock): + model = FireworksLargeLanguageModel() + + result = model.invoke( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + prompt_messages=[ + SystemPromptMessage( + content="You are a helpful AI assistant.", + ), + UserPromptMessage( + content="what's the weather today in London?", + ), + ], + model_parameters={"temperature": 0.0, "max_tokens": 100}, + tools=[ + PromptMessageTool( + name="get_weather", + description="Determine weather in my location", + parameters={ + "type": "object", + "properties": { + "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"}, + "unit": {"type": "string", "enum": ["c", "f"]}, + }, + "required": ["location"], + }, + ), + PromptMessageTool( + name="get_stock_price", + description="Get the current stock price", + parameters={ + "type": "object", + "properties": {"symbol": {"type": "string", "description": "The stock symbol"}}, + "required": ["symbol"], + }, + ), + ], + stream=False, + user="foo", + ) + + assert isinstance(result, LLMResult) + assert isinstance(result.message, AssistantPromptMessage) + assert len(result.message.tool_calls) > 0 + + +@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True) +def test_invoke_stream_chat_model(setup_openai_mock): + model = FireworksLargeLanguageModel() + + result = model.invoke( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + prompt_messages=[ + SystemPromptMessage( + content="You are a helpful AI assistant.", + ), + UserPromptMessage(content="Hello World!"), + ], + model_parameters={"temperature": 0.0, "max_tokens": 100}, + stream=True, + user="foo", + ) + + assert isinstance(result, Generator) + + for chunk in result: + assert isinstance(chunk, LLMResultChunk) + assert isinstance(chunk.delta, LLMResultChunkDelta) + assert isinstance(chunk.delta.message, AssistantPromptMessage) + assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True + if chunk.delta.finish_reason is not None: + assert chunk.delta.usage is not None + assert chunk.delta.usage.completion_tokens > 0 + + +def test_get_num_tokens(): + model = FireworksLargeLanguageModel() + + num_tokens = model.get_num_tokens( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + prompt_messages=[UserPromptMessage(content="Hello World!")], + ) + + assert num_tokens == 10 + + num_tokens = model.get_num_tokens( + model="accounts/fireworks/models/llama-v3p1-8b-instruct", + credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}, + prompt_messages=[ + SystemPromptMessage( + content="You are a helpful AI assistant.", + ), + UserPromptMessage(content="Hello World!"), + ], + tools=[ + PromptMessageTool( + name="get_weather", + description="Determine weather in my location", + parameters={ + "type": "object", + "properties": { + "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"}, + "unit": {"type": "string", "enum": ["c", "f"]}, + }, + "required": ["location"], + }, + ), + ], + ) + + assert num_tokens == 77 diff --git a/api/tests/integration_tests/model_runtime/fireworks/test_provider.py b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py new file mode 100644 index 0000000000..a68cf1a1a8 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/fireworks/test_provider.py @@ -0,0 +1,17 @@ +import os + +import pytest + +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.fireworks.fireworks import FireworksProvider +from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock + + +@pytest.mark.parametrize("setup_openai_mock", [["chat"]], indirect=True) +def test_validate_provider_credentials(setup_openai_mock): + provider = FireworksProvider() + + with pytest.raises(CredentialsValidateFailedError): + provider.validate_provider_credentials(credentials={}) + + provider.validate_provider_credentials(credentials={"fireworks_api_key": os.environ.get("FIREWORKS_API_KEY")}) diff --git a/dev/pytest/pytest_model_runtime.sh b/dev/pytest/pytest_model_runtime.sh index aba13292ab..4c1c6bf4f3 100755 --- a/dev/pytest/pytest_model_runtime.sh +++ b/dev/pytest/pytest_model_runtime.sh @@ -6,5 +6,5 @@ pytest api/tests/integration_tests/model_runtime/anthropic \ api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \ api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \ api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \ - api/tests/integration_tests/model_runtime/upstage - + api/tests/integration_tests/model_runtime/upstage \ + api/tests/integration_tests/model_runtime/fireworks From 740fad06c1fe9a5a24a6afdd7174807b82bbb464 Mon Sep 17 00:00:00 2001 From: Waffle <52460705+ox01024@users.noreply.github.com> Date: Sun, 22 Sep 2024 10:14:14 +0800 Subject: [PATCH 28/40] feat(tools/cogview): Updated cogview tool to support cogview-3 and the latest cogview-3-plus (#8382) --- .../zhipuai/zhipuai_sdk/__init__.py | 3 +- .../zhipuai/zhipuai_sdk/__version__.py | 2 +- .../zhipuai/zhipuai_sdk/_client.py | 29 +- .../zhipuai_sdk/api_resource/__init__.py | 35 +- .../api_resource/assistant/__init__.py | 3 + .../api_resource/assistant/assistant.py | 122 +++ .../zhipuai_sdk/api_resource/batches.py | 146 ++++ .../zhipuai_sdk/api_resource/chat/__init__.py | 5 + .../api_resource/chat/async_completions.py | 91 ++- .../zhipuai_sdk/api_resource/chat/chat.py | 15 +- .../api_resource/chat/completions.py | 80 +- .../zhipuai_sdk/api_resource/embeddings.py | 13 +- .../zhipuai/zhipuai_sdk/api_resource/files.py | 167 +++- .../api_resource/fine_tuning/__init__.py | 5 + .../api_resource/fine_tuning/fine_tuning.py | 15 +- .../api_resource/fine_tuning/jobs/__init__.py | 3 + .../fine_tuning/{ => jobs}/jobs.py | 68 +- .../fine_tuning/models/__init__.py | 3 + .../fine_tuning/models/fine_tuned_models.py | 41 + .../zhipuai_sdk/api_resource/images.py | 13 +- .../api_resource/knowledge/__init__.py | 3 + .../knowledge/document/__init__.py | 3 + .../knowledge/document/document.py | 217 +++++ .../api_resource/knowledge/knowledge.py | 173 ++++ .../api_resource/tools/__init__.py | 3 + .../zhipuai_sdk/api_resource/tools/tools.py | 65 ++ .../api_resource/videos/__init__.py | 7 + .../zhipuai_sdk/api_resource/videos/videos.py | 77 ++ .../zhipuai/zhipuai_sdk/core/__init__.py | 108 +++ .../zhipuai/zhipuai_sdk/core/_base_api.py | 1 + .../zhipuai/zhipuai_sdk/core/_base_compat.py | 209 +++++ .../zhipuai/zhipuai_sdk/core/_base_models.py | 671 ++++++++++++++++ .../zhipuai/zhipuai_sdk/core/_base_type.py | 99 ++- .../zhipuai/zhipuai_sdk/core/_constants.py | 12 + .../zhipuai/zhipuai_sdk/core/_errors.py | 17 +- .../zhipuai/zhipuai_sdk/core/_files.py | 86 +- .../zhipuai/zhipuai_sdk/core/_http_client.py | 758 +++++++++++++++--- .../zhipuai/zhipuai_sdk/core/_jwt_token.py | 6 +- .../core/_legacy_binary_response.py | 207 +++++ .../zhipuai_sdk/core/_legacy_response.py | 341 ++++++++ .../zhipuai/zhipuai_sdk/core/_request_opt.py | 91 ++- .../zhipuai/zhipuai_sdk/core/_response.py | 379 +++++++-- .../zhipuai/zhipuai_sdk/core/_sse_client.py | 68 +- .../zhipuai/zhipuai_sdk/core/_utils.py | 19 - .../zhipuai_sdk/core/_utils/__init__.py | 52 ++ .../zhipuai_sdk/core/_utils/_transform.py | 383 +++++++++ .../zhipuai_sdk/core/_utils/_typing.py | 122 +++ .../zhipuai/zhipuai_sdk/core/_utils/_utils.py | 409 ++++++++++ .../zhipuai/zhipuai_sdk/core/logs.py | 78 ++ .../zhipuai/zhipuai_sdk/core/pagination.py | 62 ++ .../zhipuai_sdk/types/assistant/__init__.py | 5 + .../types/assistant/assistant_completion.py | 40 + .../assistant_conversation_params.py | 7 + .../assistant/assistant_conversation_resp.py | 29 + .../assistant/assistant_create_params.py | 32 + .../types/assistant/assistant_support_resp.py | 21 + .../types/assistant/message/__init__.py | 3 + .../assistant/message/message_content.py | 13 + .../assistant/message/text_content_block.py | 14 + .../tools/code_interpreter_delta_block.py | 27 + .../message/tools/drawing_tool_delta_block.py | 21 + .../message/tools/function_delta_block.py | 22 + .../message/tools/retrieval_delta_black.py | 41 + .../assistant/message/tools/tools_type.py | 16 + .../message/tools/web_browser_delta_block.py | 48 ++ .../assistant/message/tools_delta_block.py | 16 + .../zhipuai/zhipuai_sdk/types/batch.py | 82 ++ .../zhipuai_sdk/types/batch_create_params.py | 37 + .../zhipuai/zhipuai_sdk/types/batch_error.py | 21 + .../zhipuai_sdk/types/batch_list_params.py | 20 + .../zhipuai_sdk/types/batch_request_counts.py | 14 + .../types/chat/async_chat_completion.py | 5 +- .../zhipuai_sdk/types/chat/chat_completion.py | 2 +- .../types/chat/chat_completion_chunk.py | 6 +- .../types/chat/code_geex/code_geex_params.py | 146 ++++ .../zhipuai/zhipuai_sdk/types/embeddings.py | 3 +- .../zhipuai_sdk/types/files/__init__.py | 5 + .../types/files/file_create_params.py | 38 + .../zhipuai_sdk/types/files/file_deleted.py | 13 + .../types/{ => files}/file_object.py | 4 +- .../zhipuai_sdk/types/files/upload_detail.py | 13 + .../types/fine_tuning/fine_tuning_job.py | 2 +- .../fine_tuning/fine_tuning_job_event.py | 2 +- .../types/fine_tuning/models/__init__.py | 1 + .../fine_tuning/models/fine_tuned_models.py | 13 + .../zhipuai/zhipuai_sdk/types/image.py | 2 +- .../zhipuai_sdk/types/knowledge/__init__.py | 8 + .../types/knowledge/document/__init__.py | 8 + .../types/knowledge/document/document.py | 51 ++ .../document/document_edit_params.py | 29 + .../document/document_list_params.py | 26 + .../knowledge/document/document_list_resp.py | 11 + .../zhipuai_sdk/types/knowledge/knowledge.py | 21 + .../knowledge/knowledge_create_params.py | 30 + .../types/knowledge/knowledge_list_params.py | 15 + .../types/knowledge/knowledge_list_resp.py | 11 + .../types/knowledge/knowledge_used.py | 21 + .../types/sensitive_word_check/__init__.py | 3 + .../sensitive_word_check.py | 14 + .../zhipuai_sdk/types/tools/__init__.py | 9 + .../types/tools/tools_web_search_params.py | 35 + .../zhipuai_sdk/types/tools/web_search.py | 71 ++ .../types/tools/web_search_chunk.py | 33 + .../zhipuai_sdk/types/video/__init__.py | 3 + .../types/video/video_create_params.py | 27 + .../zhipuai_sdk/types/video/video_object.py | 30 + .../builtin/cogview/tools/cogview3.py | 48 +- .../builtin/cogview/tools/cogview3.yaml | 45 +- 108 files changed, 6513 insertions(+), 405 deletions(-) create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py rename api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/{ => jobs}/jobs.py (53%) create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py delete mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py rename api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/{ => files}/file_object.py (86%) create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py create mode 100644 api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py index bf9b093cb3..fc71d64714 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__init__.py @@ -1,7 +1,8 @@ from .__version__ import __version__ from ._client import ZhipuAI -from .core._errors import ( +from .core import ( APIAuthenticationError, + APIConnectionError, APIInternalError, APIReachLimitError, APIRequestFailedError, diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py index 659f38d7ff..51f8c49ecb 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/__version__.py @@ -1 +1 @@ -__version__ = "v2.0.1" +__version__ = "v2.1.0" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py index df9e506095..705d371e62 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/_client.py @@ -9,15 +9,13 @@ from httpx import Timeout from typing_extensions import override from . import api_resource -from .core import _jwt_token -from .core._base_type import NOT_GIVEN, NotGiven -from .core._errors import ZhipuAIError -from .core._http_client import ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient +from .core import NOT_GIVEN, ZHIPUAI_DEFAULT_MAX_RETRIES, HttpClient, NotGiven, ZhipuAIError, _jwt_token class ZhipuAI(HttpClient): - chat: api_resource.chat + chat: api_resource.chat.Chat api_key: str + _disable_token_cache: bool = True def __init__( self, @@ -28,10 +26,15 @@ class ZhipuAI(HttpClient): max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES, http_client: httpx.Client | None = None, custom_headers: Mapping[str, str] | None = None, + disable_token_cache: bool = True, + _strict_response_validation: bool = False, ) -> None: if api_key is None: - raise ZhipuAIError("No api_key provided, please provide it through parameters or environment variables") + api_key = os.environ.get("ZHIPUAI_API_KEY") + if api_key is None: + raise ZhipuAIError("未提供api_key,请通过参数或环境变量提供") self.api_key = api_key + self._disable_token_cache = disable_token_cache if base_url is None: base_url = os.environ.get("ZHIPUAI_BASE_URL") @@ -42,21 +45,31 @@ class ZhipuAI(HttpClient): super().__init__( version=__version__, base_url=base_url, + max_retries=max_retries, timeout=timeout, custom_httpx_client=http_client, custom_headers=custom_headers, + _strict_response_validation=_strict_response_validation, ) self.chat = api_resource.chat.Chat(self) self.images = api_resource.images.Images(self) self.embeddings = api_resource.embeddings.Embeddings(self) self.files = api_resource.files.Files(self) self.fine_tuning = api_resource.fine_tuning.FineTuning(self) + self.batches = api_resource.Batches(self) + self.knowledge = api_resource.Knowledge(self) + self.tools = api_resource.Tools(self) + self.videos = api_resource.Videos(self) + self.assistant = api_resource.Assistant(self) @property @override - def _auth_headers(self) -> dict[str, str]: + def auth_headers(self) -> dict[str, str]: api_key = self.api_key - return {"Authorization": f"{_jwt_token.generate_token(api_key)}"} + if self._disable_token_cache: + return {"Authorization": f"Bearer {api_key}"} + else: + return {"Authorization": f"Bearer {_jwt_token.generate_token(api_key)}"} def __del__(self) -> None: if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close") or not hasattr(self, "_client"): diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py index 0a90e21e48..4fe0719dde 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/__init__.py @@ -1,5 +1,34 @@ -from .chat import chat +from .assistant import ( + Assistant, +) +from .batches import Batches +from .chat import ( + AsyncCompletions, + Chat, + Completions, +) from .embeddings import Embeddings -from .files import Files -from .fine_tuning import fine_tuning +from .files import Files, FilesWithRawResponse +from .fine_tuning import FineTuning from .images import Images +from .knowledge import Knowledge +from .tools import Tools +from .videos import ( + Videos, +) + +__all__ = [ + "Videos", + "AsyncCompletions", + "Chat", + "Completions", + "Images", + "Embeddings", + "Files", + "FilesWithRawResponse", + "FineTuning", + "Batches", + "Knowledge", + "Tools", + "Assistant", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py new file mode 100644 index 0000000000..ce619aa7f0 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/__init__.py @@ -0,0 +1,3 @@ +from .assistant import Assistant + +__all__ = ["Assistant"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py new file mode 100644 index 0000000000..f772340a82 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/assistant/assistant.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +import httpx + +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + StreamResponse, + deepcopy_minimal, + make_request_options, + maybe_transform, +) +from ...types.assistant import AssistantCompletion +from ...types.assistant.assistant_conversation_resp import ConversationUsageListResp +from ...types.assistant.assistant_support_resp import AssistantSupportResp + +if TYPE_CHECKING: + from ..._client import ZhipuAI + +from ...types.assistant import assistant_conversation_params, assistant_create_params + +__all__ = ["Assistant"] + + +class Assistant(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def conversation( + self, + assistant_id: str, + model: str, + messages: list[assistant_create_params.ConversationMessage], + *, + stream: bool = True, + conversation_id: Optional[str] = None, + attachments: Optional[list[assistant_create_params.AssistantAttachments]] = None, + metadata: dict | None = None, + request_id: str = None, + user_id: str = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> StreamResponse[AssistantCompletion]: + body = deepcopy_minimal( + { + "assistant_id": assistant_id, + "model": model, + "messages": messages, + "stream": stream, + "conversation_id": conversation_id, + "attachments": attachments, + "metadata": metadata, + "request_id": request_id, + "user_id": user_id, + } + ) + return self._post( + "/assistant", + body=maybe_transform(body, assistant_create_params.AssistantParameters), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=AssistantCompletion, + stream=stream or True, + stream_cls=StreamResponse[AssistantCompletion], + ) + + def query_support( + self, + *, + assistant_id_list: list[str] = None, + request_id: str = None, + user_id: str = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AssistantSupportResp: + body = deepcopy_minimal( + { + "assistant_id_list": assistant_id_list, + "request_id": request_id, + "user_id": user_id, + } + ) + return self._post( + "/assistant/list", + body=body, + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=AssistantSupportResp, + ) + + def query_conversation_usage( + self, + assistant_id: str, + page: int = 1, + page_size: int = 10, + *, + request_id: str = None, + user_id: str = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ConversationUsageListResp: + body = deepcopy_minimal( + { + "assistant_id": assistant_id, + "page": page, + "page_size": page_size, + "request_id": request_id, + "user_id": user_id, + } + ) + return self._post( + "/assistant/conversation/list", + body=maybe_transform(body, assistant_conversation_params.ConversationParameters), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=ConversationUsageListResp, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py new file mode 100644 index 0000000000..ae2f2be85e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/batches.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, Optional + +import httpx + +from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options, maybe_transform +from ..core.pagination import SyncCursorPage +from ..types import batch_create_params, batch_list_params +from ..types.batch import Batch + +if TYPE_CHECKING: + from .._client import ZhipuAI + + +class Batches(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def create( + self, + *, + completion_window: str | None = None, + endpoint: Literal["/v1/chat/completions", "/v1/embeddings"], + input_file_id: str, + metadata: Optional[dict[str, str]] | NotGiven = NOT_GIVEN, + auto_delete_input_file: bool = True, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + return self._post( + "/batches", + body=maybe_transform( + { + "completion_window": completion_window, + "endpoint": endpoint, + "input_file_id": input_file_id, + "metadata": metadata, + "auto_delete_input_file": auto_delete_input_file, + }, + batch_create_params.BatchCreateParams, + ), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=Batch, + ) + + def retrieve( + self, + batch_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Retrieves a batch. + + Args: + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return self._get( + f"/batches/{batch_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=Batch, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> SyncCursorPage[Batch]: + """List your organization's batches. + + Args: + after: A cursor for use in pagination. + + `after` is an object ID that defines your place + in the list. For instance, if you make a list request and receive 100 objects, + ending with obj_foo, your subsequent call can include after=obj_foo in order to + fetch the next page of the list. + + limit: A limit on the number of objects to be returned. Limit can range between 1 and + 100, and the default is 20. + + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get_api_list( + "/batches", + page=SyncCursorPage[Batch], + options=make_request_options( + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + }, + batch_list_params.BatchListParams, + ), + ), + model=Batch, + ) + + def cancel( + self, + batch_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Batch: + """ + Cancels an in-progress batch. + + Args: + batch_id: The ID of the batch to cancel. + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + + """ + if not batch_id: + raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}") + return self._post( + f"/batches/{batch_id}/cancel", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=Batch, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py index e69de29bb2..5cd8dc6f33 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/__init__.py @@ -0,0 +1,5 @@ +from .async_completions import AsyncCompletions +from .chat import Chat +from .completions import Completions + +__all__ = ["AsyncCompletions", "Chat", "Completions"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py index 1f80119739..d8ecc31064 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/async_completions.py @@ -1,13 +1,25 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING, Literal, Optional, Union import httpx -from ...core._base_api import BaseAPI -from ...core._base_type import NOT_GIVEN, Headers, NotGiven -from ...core._http_client import make_user_request_input +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + drop_prefix_image_data, + make_request_options, + maybe_transform, +) from ...types.chat.async_chat_completion import AsyncCompletion, AsyncTaskStatus +from ...types.chat.code_geex import code_geex_params +from ...types.sensitive_word_check import SensitiveWordCheckRequest + +logger = logging.getLogger(__name__) if TYPE_CHECKING: from ..._client import ZhipuAI @@ -22,6 +34,7 @@ class AsyncCompletions(BaseAPI): *, model: str, request_id: Optional[str] | NotGiven = NOT_GIVEN, + user_id: Optional[str] | NotGiven = NOT_GIVEN, do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, top_p: Optional[float] | NotGiven = NOT_GIVEN, @@ -29,50 +42,74 @@ class AsyncCompletions(BaseAPI): seed: int | NotGiven = NOT_GIVEN, messages: Union[str, list[str], list[int], list[list[int]], None], stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN, - sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN, + sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN, tools: Optional[object] | NotGiven = NOT_GIVEN, tool_choice: str | NotGiven = NOT_GIVEN, + meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN, + extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, - disable_strict_validation: Optional[bool] | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> AsyncTaskStatus: _cast_type = AsyncTaskStatus + logger.debug(f"temperature:{temperature}, top_p:{top_p}") + if temperature is not None and temperature != NOT_GIVEN: + if temperature <= 0: + do_sample = False + temperature = 0.01 + # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperture不生效)") # noqa: E501 + if temperature >= 1: + temperature = 0.99 + # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间") + if top_p is not None and top_p != NOT_GIVEN: + if top_p >= 1: + top_p = 0.99 + # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1") + if top_p <= 0: + top_p = 0.01 + # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1") - if disable_strict_validation: - _cast_type = object + logger.debug(f"temperature:{temperature}, top_p:{top_p}") + if isinstance(messages, list): + for item in messages: + if item.get("content"): + item["content"] = drop_prefix_image_data(item["content"]) + + body = { + "model": model, + "request_id": request_id, + "user_id": user_id, + "temperature": temperature, + "top_p": top_p, + "do_sample": do_sample, + "max_tokens": max_tokens, + "seed": seed, + "messages": messages, + "stop": stop, + "sensitive_word_check": sensitive_word_check, + "tools": tools, + "tool_choice": tool_choice, + "meta": meta, + "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra), + } return self._post( "/async/chat/completions", - body={ - "model": model, - "request_id": request_id, - "temperature": temperature, - "top_p": top_p, - "do_sample": do_sample, - "max_tokens": max_tokens, - "seed": seed, - "messages": messages, - "stop": stop, - "sensitive_word_check": sensitive_word_check, - "tools": tools, - "tool_choice": tool_choice, - }, - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + body=body, + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=_cast_type, - enable_stream=False, + stream=False, ) def retrieve_completion_result( self, id: str, extra_headers: Headers | None = None, - disable_strict_validation: Optional[bool] | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Union[AsyncCompletion, AsyncTaskStatus]: _cast_type = Union[AsyncCompletion, AsyncTaskStatus] - if disable_strict_validation: - _cast_type = object return self._get( path=f"/async-result/{id}", cast_type=_cast_type, - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py index 92362fc50a..b3cc46566c 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/chat.py @@ -1,17 +1,18 @@ from typing import TYPE_CHECKING -from ...core._base_api import BaseAPI +from ...core import BaseAPI, cached_property from .async_completions import AsyncCompletions from .completions import Completions if TYPE_CHECKING: - from ..._client import ZhipuAI + pass class Chat(BaseAPI): - completions: Completions + @cached_property + def completions(self) -> Completions: + return Completions(self._client) - def __init__(self, client: "ZhipuAI") -> None: - super().__init__(client) - self.completions = Completions(client) - self.asyncCompletions = AsyncCompletions(client) + @cached_property + def asyncCompletions(self) -> AsyncCompletions: # noqa: N802 + return AsyncCompletions(self._client) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py index ec29f33864..1c23473a03 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/chat/completions.py @@ -1,15 +1,28 @@ from __future__ import annotations +import logging from typing import TYPE_CHECKING, Literal, Optional, Union import httpx -from ...core._base_api import BaseAPI -from ...core._base_type import NOT_GIVEN, Headers, NotGiven -from ...core._http_client import make_user_request_input -from ...core._sse_client import StreamResponse +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + StreamResponse, + deepcopy_minimal, + drop_prefix_image_data, + make_request_options, + maybe_transform, +) from ...types.chat.chat_completion import Completion from ...types.chat.chat_completion_chunk import ChatCompletionChunk +from ...types.chat.code_geex import code_geex_params +from ...types.sensitive_word_check import SensitiveWordCheckRequest + +logger = logging.getLogger(__name__) if TYPE_CHECKING: from ..._client import ZhipuAI @@ -24,6 +37,7 @@ class Completions(BaseAPI): *, model: str, request_id: Optional[str] | NotGiven = NOT_GIVEN, + user_id: Optional[str] | NotGiven = NOT_GIVEN, do_sample: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, temperature: Optional[float] | NotGiven = NOT_GIVEN, @@ -32,23 +46,43 @@ class Completions(BaseAPI): seed: int | NotGiven = NOT_GIVEN, messages: Union[str, list[str], list[int], object, None], stop: Optional[Union[str, list[str], None]] | NotGiven = NOT_GIVEN, - sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN, + sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN, tools: Optional[object] | NotGiven = NOT_GIVEN, tool_choice: str | NotGiven = NOT_GIVEN, + meta: Optional[dict[str, str]] | NotGiven = NOT_GIVEN, + extra: Optional[code_geex_params.CodeGeexExtra] | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, - disable_strict_validation: Optional[bool] | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Completion | StreamResponse[ChatCompletionChunk]: - _cast_type = Completion - _stream_cls = StreamResponse[ChatCompletionChunk] - if disable_strict_validation: - _cast_type = object - _stream_cls = StreamResponse[object] - return self._post( - "/chat/completions", - body={ + logger.debug(f"temperature:{temperature}, top_p:{top_p}") + if temperature is not None and temperature != NOT_GIVEN: + if temperature <= 0: + do_sample = False + temperature = 0.01 + # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间,do_sample重写为:false(参数top_p temperture不生效)") # noqa: E501 + if temperature >= 1: + temperature = 0.99 + # logger.warning("temperature:取值范围是:(0.0, 1.0) 开区间") + if top_p is not None and top_p != NOT_GIVEN: + if top_p >= 1: + top_p = 0.99 + # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1") + if top_p <= 0: + top_p = 0.01 + # logger.warning("top_p:取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1") + + logger.debug(f"temperature:{temperature}, top_p:{top_p}") + if isinstance(messages, list): + for item in messages: + if item.get("content"): + item["content"] = drop_prefix_image_data(item["content"]) + + body = deepcopy_minimal( + { "model": model, "request_id": request_id, + "user_id": user_id, "temperature": temperature, "top_p": top_p, "do_sample": do_sample, @@ -60,11 +94,15 @@ class Completions(BaseAPI): "stream": stream, "tools": tools, "tool_choice": tool_choice, - }, - options=make_user_request_input( - extra_headers=extra_headers, - ), - cast_type=_cast_type, - enable_stream=stream or False, - stream_cls=_stream_cls, + "meta": meta, + "extra": maybe_transform(extra, code_geex_params.CodeGeexExtra), + } + ) + return self._post( + "/chat/completions", + body=body, + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=Completion, + stream=stream or False, + stream_cls=StreamResponse[ChatCompletionChunk], ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py index 2308a20451..4b4baef942 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/embeddings.py @@ -4,9 +4,7 @@ from typing import TYPE_CHECKING, Optional, Union import httpx -from ..core._base_api import BaseAPI -from ..core._base_type import NOT_GIVEN, Headers, NotGiven -from ..core._http_client import make_user_request_input +from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options from ..types.embeddings import EmbeddingsResponded if TYPE_CHECKING: @@ -22,10 +20,13 @@ class Embeddings(BaseAPI): *, input: Union[str, list[str], list[int], list[list[int]]], model: Union[str], + dimensions: Union[int] | NotGiven = NOT_GIVEN, encoding_format: str | NotGiven = NOT_GIVEN, user: str | NotGiven = NOT_GIVEN, + request_id: Optional[str] | NotGiven = NOT_GIVEN, sensitive_word_check: Optional[object] | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, + extra_body: Body | None = None, disable_strict_validation: Optional[bool] | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> EmbeddingsResponded: @@ -37,11 +38,13 @@ class Embeddings(BaseAPI): body={ "input": input, "model": model, + "dimensions": dimensions, "encoding_format": encoding_format, "user": user, + "request_id": request_id, "sensitive_word_check": sensitive_word_check, }, - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=_cast_type, - enable_stream=False, + stream=False, ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py index f2ac74bffa..ba9de75b7e 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/files.py @@ -1,19 +1,30 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from collections.abc import Mapping +from typing import TYPE_CHECKING, Literal, cast import httpx -from ..core._base_api import BaseAPI -from ..core._base_type import NOT_GIVEN, FileTypes, Headers, NotGiven -from ..core._files import is_file_content -from ..core._http_client import make_user_request_input -from ..types.file_object import FileObject, ListOfFileObject +from ..core import ( + NOT_GIVEN, + BaseAPI, + Body, + FileTypes, + Headers, + NotGiven, + _legacy_binary_response, + _legacy_response, + deepcopy_minimal, + extract_files, + make_request_options, + maybe_transform, +) +from ..types.files import FileDeleted, FileObject, ListOfFileObject, UploadDetail, file_create_params if TYPE_CHECKING: from .._client import ZhipuAI -__all__ = ["Files"] +__all__ = ["Files", "FilesWithRawResponse"] class Files(BaseAPI): @@ -23,30 +34,69 @@ class Files(BaseAPI): def create( self, *, - file: FileTypes, - purpose: str, + file: FileTypes = None, + upload_detail: list[UploadDetail] = None, + purpose: Literal["fine-tune", "retrieval", "batch"], + knowledge_id: str = None, + sentence_size: int = None, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FileObject: - if not is_file_content(file): - prefix = f"Expected file input `{file!r}`" - raise RuntimeError( - f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(file)} instead." - ) from None - files = [("file", file)] - - extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} - + if not file and not upload_detail: + raise ValueError("At least one of `file` and `upload_detail` must be provided.") + body = deepcopy_minimal( + { + "file": file, + "upload_detail": upload_detail, + "purpose": purpose, + "knowledge_id": knowledge_id, + "sentence_size": sentence_size, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + if files: + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} return self._post( "/files", - body={ - "purpose": purpose, - }, + body=maybe_transform(body, file_create_params.FileCreateParams), files=files, - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=FileObject, ) + # def retrieve( + # self, + # file_id: str, + # *, + # extra_headers: Headers | None = None, + # extra_body: Body | None = None, + # timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + # ) -> FileObject: + # """ + # Returns information about a specific file. + # + # Args: + # file_id: The ID of the file to retrieve information about + # extra_headers: Send extra headers + # + # extra_body: Add additional JSON properties to the request + # + # timeout: Override the client-level default timeout for this request, in seconds + # """ + # if not file_id: + # raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + # return self._get( + # f"/files/{file_id}", + # options=make_request_options( + # extra_headers=extra_headers, extra_body=extra_body, timeout=timeout + # ), + # cast_type=FileObject, + # ) + def list( self, *, @@ -55,13 +105,15 @@ class Files(BaseAPI): after: str | NotGiven = NOT_GIVEN, order: str | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ListOfFileObject: return self._get( "/files", cast_type=ListOfFileObject, - options=make_user_request_input( + options=make_request_options( extra_headers=extra_headers, + extra_body=extra_body, timeout=timeout, query={ "purpose": purpose, @@ -71,3 +123,72 @@ class Files(BaseAPI): }, ), ) + + def delete( + self, + file_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FileDeleted: + """ + Delete a file. + + Args: + file_id: The ID of the file to delete + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + return self._delete( + f"/files/{file_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=FileDeleted, + ) + + def content( + self, + file_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> _legacy_response.HttpxBinaryResponseContent: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not file_id: + raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}") + extra_headers = {"Accept": "application/binary", **(extra_headers or {})} + return self._get( + f"/files/{file_id}/content", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=_legacy_binary_response.HttpxBinaryResponseContent, + ) + + +class FilesWithRawResponse: + def __init__(self, files: Files) -> None: + self._files = files + + self.create = _legacy_response.to_raw_response_wrapper( + files.create, + ) + self.list = _legacy_response.to_raw_response_wrapper( + files.list, + ) + self.content = _legacy_response.to_raw_response_wrapper( + files.content, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py index e69de29bb2..7c309b8341 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/__init__.py @@ -0,0 +1,5 @@ +from .fine_tuning import FineTuning +from .jobs import Jobs +from .models import FineTunedModels + +__all__ = ["Jobs", "FineTunedModels", "FineTuning"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py index dc30bd33ed..8670f7de00 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/fine_tuning.py @@ -1,15 +1,18 @@ from typing import TYPE_CHECKING -from ...core._base_api import BaseAPI +from ...core import BaseAPI, cached_property from .jobs import Jobs +from .models import FineTunedModels if TYPE_CHECKING: - from ..._client import ZhipuAI + pass class FineTuning(BaseAPI): - jobs: Jobs + @cached_property + def jobs(self) -> Jobs: + return Jobs(self._client) - def __init__(self, client: "ZhipuAI") -> None: - super().__init__(client) - self.jobs = Jobs(client) + @cached_property + def models(self) -> FineTunedModels: + return FineTunedModels(self._client) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py new file mode 100644 index 0000000000..40777a153f --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/__init__.py @@ -0,0 +1,3 @@ +from .jobs import Jobs + +__all__ = ["Jobs"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py similarity index 53% rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py index 3d2e9208a1..8b038cadc0 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/jobs/jobs.py @@ -4,13 +4,23 @@ from typing import TYPE_CHECKING, Optional import httpx -from ...core._base_api import BaseAPI -from ...core._base_type import NOT_GIVEN, Headers, NotGiven -from ...core._http_client import make_user_request_input -from ...types.fine_tuning import FineTuningJob, FineTuningJobEvent, ListOfFineTuningJob, job_create_params +from ....core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + make_request_options, +) +from ....types.fine_tuning import ( + FineTuningJob, + FineTuningJobEvent, + ListOfFineTuningJob, + job_create_params, +) if TYPE_CHECKING: - from ..._client import ZhipuAI + from ...._client import ZhipuAI __all__ = ["Jobs"] @@ -29,6 +39,7 @@ class Jobs(BaseAPI): request_id: Optional[str] | NotGiven = NOT_GIVEN, validation_file: Optional[str] | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuningJob: return self._post( @@ -41,7 +52,7 @@ class Jobs(BaseAPI): "validation_file": validation_file, "request_id": request_id, }, - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=FineTuningJob, ) @@ -50,11 +61,12 @@ class Jobs(BaseAPI): fine_tuning_job_id: str, *, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuningJob: return self._get( f"/fine_tuning/jobs/{fine_tuning_job_id}", - options=make_user_request_input(extra_headers=extra_headers, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=FineTuningJob, ) @@ -64,13 +76,15 @@ class Jobs(BaseAPI): after: str | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ListOfFineTuningJob: return self._get( "/fine_tuning/jobs", cast_type=ListOfFineTuningJob, - options=make_user_request_input( + options=make_request_options( extra_headers=extra_headers, + extra_body=extra_body, timeout=timeout, query={ "after": after, @@ -79,6 +93,24 @@ class Jobs(BaseAPI): ), ) + def cancel( + self, + fine_tuning_job_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # noqa: E501 + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._post( + f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=FineTuningJob, + ) + def list_events( self, fine_tuning_job_id: str, @@ -86,13 +118,15 @@ class Jobs(BaseAPI): after: str | NotGiven = NOT_GIVEN, limit: int | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, + extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> FineTuningJobEvent: return self._get( f"/fine_tuning/jobs/{fine_tuning_job_id}/events", cast_type=FineTuningJobEvent, - options=make_user_request_input( + options=make_request_options( extra_headers=extra_headers, + extra_body=extra_body, timeout=timeout, query={ "after": after, @@ -100,3 +134,19 @@ class Jobs(BaseAPI): }, ), ) + + def delete( + self, + fine_tuning_job_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTuningJob: + if not fine_tuning_job_id: + raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}") + return self._delete( + f"/fine_tuning/jobs/{fine_tuning_job_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=FineTuningJob, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py new file mode 100644 index 0000000000..d832635baf --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/__init__.py @@ -0,0 +1,3 @@ +from .fine_tuned_models import FineTunedModels + +__all__ = ["FineTunedModels"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py new file mode 100644 index 0000000000..29c023e3b1 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/fine_tuning/models/fine_tuned_models.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import httpx + +from ....core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + make_request_options, +) +from ....types.fine_tuning.models import FineTunedModelsStatus + +if TYPE_CHECKING: + from ...._client import ZhipuAI + +__all__ = ["FineTunedModels"] + + +class FineTunedModels(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def delete( + self, + fine_tuned_model: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> FineTunedModelsStatus: + if not fine_tuned_model: + raise ValueError(f"Expected a non-empty value for `fine_tuned_model` but received {fine_tuned_model!r}") + return self._delete( + f"fine_tuning/fine_tuned_models/{fine_tuned_model}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=FineTunedModelsStatus, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py index 2692b093af..8ad411913f 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/images.py @@ -4,10 +4,9 @@ from typing import TYPE_CHECKING, Optional import httpx -from ..core._base_api import BaseAPI -from ..core._base_type import NOT_GIVEN, Body, Headers, NotGiven -from ..core._http_client import make_user_request_input +from ..core import NOT_GIVEN, BaseAPI, Body, Headers, NotGiven, make_request_options from ..types.image import ImagesResponded +from ..types.sensitive_word_check import SensitiveWordCheckRequest if TYPE_CHECKING: from .._client import ZhipuAI @@ -27,8 +26,10 @@ class Images(BaseAPI): response_format: Optional[str] | NotGiven = NOT_GIVEN, size: Optional[str] | NotGiven = NOT_GIVEN, style: Optional[str] | NotGiven = NOT_GIVEN, + sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN, user: str | NotGiven = NOT_GIVEN, request_id: Optional[str] | NotGiven = NOT_GIVEN, + user_id: Optional[str] | NotGiven = NOT_GIVEN, extra_headers: Headers | None = None, extra_body: Body | None = None, disable_strict_validation: Optional[bool] | None = None, @@ -45,12 +46,14 @@ class Images(BaseAPI): "n": n, "quality": quality, "response_format": response_format, + "sensitive_word_check": sensitive_word_check, "size": size, "style": style, "user": user, + "user_id": user_id, "request_id": request_id, }, - options=make_user_request_input(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=_cast_type, - enable_stream=False, + stream=False, ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py new file mode 100644 index 0000000000..5a67d743c3 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/__init__.py @@ -0,0 +1,3 @@ +from .knowledge import Knowledge + +__all__ = ["Knowledge"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py new file mode 100644 index 0000000000..fd289e2232 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/__init__.py @@ -0,0 +1,3 @@ +from .document import Document + +__all__ = ["Document"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py new file mode 100644 index 0000000000..2c4066d893 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/document/document.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +from collections.abc import Mapping +from typing import TYPE_CHECKING, Literal, Optional, cast + +import httpx + +from ....core import ( + NOT_GIVEN, + BaseAPI, + Body, + FileTypes, + Headers, + NotGiven, + deepcopy_minimal, + extract_files, + make_request_options, + maybe_transform, +) +from ....types.files import UploadDetail, file_create_params +from ....types.knowledge.document import DocumentData, DocumentObject, document_edit_params, document_list_params +from ....types.knowledge.document.document_list_resp import DocumentPage + +if TYPE_CHECKING: + from ...._client import ZhipuAI + +__all__ = ["Document"] + + +class Document(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def create( + self, + *, + file: FileTypes = None, + custom_separator: Optional[list[str]] = None, + upload_detail: list[UploadDetail] = None, + purpose: Literal["retrieval"], + knowledge_id: str = None, + sentence_size: int = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> DocumentObject: + if not file and not upload_detail: + raise ValueError("At least one of `file` and `upload_detail` must be provided.") + body = deepcopy_minimal( + { + "file": file, + "upload_detail": upload_detail, + "purpose": purpose, + "custom_separator": custom_separator, + "knowledge_id": knowledge_id, + "sentence_size": sentence_size, + } + ) + files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) + if files: + # It should be noted that the actual Content-Type header that will be + # sent to the server will contain a `boundary` parameter, e.g. + # multipart/form-data; boundary=---abc-- + extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})} + return self._post( + "/files", + body=maybe_transform(body, file_create_params.FileCreateParams), + files=files, + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=DocumentObject, + ) + + def edit( + self, + document_id: str, + knowledge_type: str, + *, + custom_separator: Optional[list[str]] = None, + sentence_size: Optional[int] = None, + callback_url: Optional[str] = None, + callback_header: Optional[dict[str, str]] = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> httpx.Response: + """ + + Args: + document_id: 知识id + knowledge_type: 知识类型: + 1:文章知识: 支持pdf,url,docx + 2.问答知识-文档: 支持pdf,url,docx + 3.问答知识-表格: 支持xlsx + 4.商品库-表格: 支持xlsx + 5.自定义: 支持pdf,url,docx + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + :param knowledge_type: + :param document_id: + :param timeout: + :param extra_body: + :param callback_header: + :param sentence_size: + :param extra_headers: + :param callback_url: + :param custom_separator: + """ + if not document_id: + raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}") + + body = deepcopy_minimal( + { + "id": document_id, + "knowledge_type": knowledge_type, + "custom_separator": custom_separator, + "sentence_size": sentence_size, + "callback_url": callback_url, + "callback_header": callback_header, + } + ) + + return self._put( + f"/document/{document_id}", + body=maybe_transform(body, document_edit_params.DocumentEditParams), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=httpx.Response, + ) + + def list( + self, + knowledge_id: str, + *, + purpose: str | NotGiven = NOT_GIVEN, + page: str | NotGiven = NOT_GIVEN, + limit: str | NotGiven = NOT_GIVEN, + order: Literal["desc", "asc"] | NotGiven = NOT_GIVEN, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> DocumentPage: + return self._get( + "/files", + options=make_request_options( + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "knowledge_id": knowledge_id, + "purpose": purpose, + "page": page, + "limit": limit, + "order": order, + }, + document_list_params.DocumentListParams, + ), + ), + cast_type=DocumentPage, + ) + + def delete( + self, + document_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> httpx.Response: + """ + Delete a file. + + Args: + + document_id: 知识id + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not document_id: + raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}") + + return self._delete( + f"/document/{document_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=httpx.Response, + ) + + def retrieve( + self, + document_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> DocumentData: + """ + + Args: + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not document_id: + raise ValueError(f"Expected a non-empty value for `document_id` but received {document_id!r}") + + return self._get( + f"/document/{document_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=DocumentData, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py new file mode 100644 index 0000000000..fea4c73ac9 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/knowledge/knowledge.py @@ -0,0 +1,173 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, Optional + +import httpx + +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + cached_property, + deepcopy_minimal, + make_request_options, + maybe_transform, +) +from ...types.knowledge import KnowledgeInfo, KnowledgeUsed, knowledge_create_params, knowledge_list_params +from ...types.knowledge.knowledge_list_resp import KnowledgePage +from .document import Document + +if TYPE_CHECKING: + from ..._client import ZhipuAI + +__all__ = ["Knowledge"] + + +class Knowledge(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + @cached_property + def document(self) -> Document: + return Document(self._client) + + def create( + self, + embedding_id: int, + name: str, + *, + customer_identifier: Optional[str] = None, + description: Optional[str] = None, + background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None, + icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None, + bucket_id: Optional[str] = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> KnowledgeInfo: + body = deepcopy_minimal( + { + "embedding_id": embedding_id, + "name": name, + "customer_identifier": customer_identifier, + "description": description, + "background": background, + "icon": icon, + "bucket_id": bucket_id, + } + ) + return self._post( + "/knowledge", + body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=KnowledgeInfo, + ) + + def modify( + self, + knowledge_id: str, + embedding_id: int, + *, + name: str, + description: Optional[str] = None, + background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None, + icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> httpx.Response: + body = deepcopy_minimal( + { + "id": knowledge_id, + "embedding_id": embedding_id, + "name": name, + "description": description, + "background": background, + "icon": icon, + } + ) + return self._put( + f"/knowledge/{knowledge_id}", + body=maybe_transform(body, knowledge_create_params.KnowledgeBaseParams), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=httpx.Response, + ) + + def query( + self, + *, + page: int | NotGiven = 1, + size: int | NotGiven = 10, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> KnowledgePage: + return self._get( + "/knowledge", + options=make_request_options( + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "page": page, + "size": size, + }, + knowledge_list_params.KnowledgeListParams, + ), + ), + cast_type=KnowledgePage, + ) + + def delete( + self, + knowledge_id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> httpx.Response: + """ + Delete a file. + + Args: + knowledge_id: 知识库ID + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not knowledge_id: + raise ValueError("Expected a non-empty value for `knowledge_id`") + + return self._delete( + f"/knowledge/{knowledge_id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=httpx.Response, + ) + + def used( + self, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> KnowledgeUsed: + """ + Returns the contents of the specified file. + + Args: + extra_headers: Send extra headers + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get( + "/knowledge/capacity", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=KnowledgeUsed, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py new file mode 100644 index 0000000000..43e4e37da1 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/__init__.py @@ -0,0 +1,3 @@ +from .tools import Tools + +__all__ = ["Tools"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py new file mode 100644 index 0000000000..3c3a630aff --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/tools/tools.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Literal, Optional, Union + +import httpx + +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + StreamResponse, + deepcopy_minimal, + make_request_options, + maybe_transform, +) +from ...types.tools import WebSearch, WebSearchChunk, tools_web_search_params + +logger = logging.getLogger(__name__) + +if TYPE_CHECKING: + from ..._client import ZhipuAI + +__all__ = ["Tools"] + + +class Tools(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def web_search( + self, + *, + model: str, + request_id: Optional[str] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + messages: Union[str, list[str], list[int], object, None], + scope: Optional[str] | NotGiven = NOT_GIVEN, + location: Optional[str] | NotGiven = NOT_GIVEN, + recent_days: Optional[int] | NotGiven = NOT_GIVEN, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> WebSearch | StreamResponse[WebSearchChunk]: + body = deepcopy_minimal( + { + "model": model, + "request_id": request_id, + "messages": messages, + "stream": stream, + "scope": scope, + "location": location, + "recent_days": recent_days, + } + ) + return self._post( + "/tools", + body=maybe_transform(body, tools_web_search_params.WebSearchParams), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=WebSearch, + stream=stream or False, + stream_cls=StreamResponse[WebSearchChunk], + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py new file mode 100644 index 0000000000..6b0f99ed09 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/__init__.py @@ -0,0 +1,7 @@ +from .videos import ( + Videos, +) + +__all__ = [ + "Videos", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py new file mode 100644 index 0000000000..f1f1c08036 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/api_resource/videos/videos.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional + +import httpx + +from ...core import ( + NOT_GIVEN, + BaseAPI, + Body, + Headers, + NotGiven, + deepcopy_minimal, + make_request_options, + maybe_transform, +) +from ...types.sensitive_word_check import SensitiveWordCheckRequest +from ...types.video import VideoObject, video_create_params + +if TYPE_CHECKING: + from ..._client import ZhipuAI + +__all__ = ["Videos"] + + +class Videos(BaseAPI): + def __init__(self, client: ZhipuAI) -> None: + super().__init__(client) + + def generations( + self, + model: str, + *, + prompt: str = None, + image_url: str = None, + sensitive_word_check: Optional[SensitiveWordCheckRequest] | NotGiven = NOT_GIVEN, + request_id: str = None, + user_id: str = None, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VideoObject: + if not model and not model: + raise ValueError("At least one of `model` and `prompt` must be provided.") + body = deepcopy_minimal( + { + "model": model, + "prompt": prompt, + "image_url": image_url, + "sensitive_word_check": sensitive_word_check, + "request_id": request_id, + "user_id": user_id, + } + ) + return self._post( + "/videos/generations", + body=maybe_transform(body, video_create_params.VideoCreateParams), + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=VideoObject, + ) + + def retrieve_videos_result( + self, + id: str, + *, + extra_headers: Headers | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VideoObject: + if not id: + raise ValueError("At least one of `id` must be provided.") + + return self._get( + f"/async-result/{id}", + options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), + cast_type=VideoObject, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py index e69de29bb2..3d6466d279 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/__init__.py @@ -0,0 +1,108 @@ +from ._base_api import BaseAPI +from ._base_compat import ( + PYDANTIC_V2, + ConfigDict, + GenericModel, + cached_property, + field_get_default, + get_args, + get_model_config, + get_model_fields, + get_origin, + is_literal_type, + is_union, + parse_obj, +) +from ._base_models import BaseModel, construct_type +from ._base_type import ( + NOT_GIVEN, + Body, + FileTypes, + Headers, + IncEx, + ModelT, + NotGiven, + Query, +) +from ._constants import ( + ZHIPUAI_DEFAULT_LIMITS, + ZHIPUAI_DEFAULT_MAX_RETRIES, + ZHIPUAI_DEFAULT_TIMEOUT, +) +from ._errors import ( + APIAuthenticationError, + APIConnectionError, + APIInternalError, + APIReachLimitError, + APIRequestFailedError, + APIResponseError, + APIResponseValidationError, + APIServerFlowExceedError, + APIStatusError, + APITimeoutError, + ZhipuAIError, +) +from ._files import is_file_content +from ._http_client import HttpClient, make_request_options +from ._sse_client import StreamResponse +from ._utils import ( + deepcopy_minimal, + drop_prefix_image_data, + extract_files, + is_given, + is_list, + is_mapping, + maybe_transform, + parse_date, + parse_datetime, +) + +__all__ = [ + "BaseModel", + "construct_type", + "BaseAPI", + "NOT_GIVEN", + "Headers", + "NotGiven", + "Body", + "IncEx", + "ModelT", + "Query", + "FileTypes", + "PYDANTIC_V2", + "ConfigDict", + "GenericModel", + "get_args", + "is_union", + "parse_obj", + "get_origin", + "is_literal_type", + "get_model_config", + "get_model_fields", + "field_get_default", + "is_file_content", + "ZhipuAIError", + "APIStatusError", + "APIRequestFailedError", + "APIAuthenticationError", + "APIReachLimitError", + "APIInternalError", + "APIServerFlowExceedError", + "APIResponseError", + "APIResponseValidationError", + "APITimeoutError", + "make_request_options", + "HttpClient", + "ZHIPUAI_DEFAULT_TIMEOUT", + "ZHIPUAI_DEFAULT_MAX_RETRIES", + "ZHIPUAI_DEFAULT_LIMITS", + "is_list", + "is_mapping", + "parse_date", + "parse_datetime", + "is_given", + "maybe_transform", + "deepcopy_minimal", + "extract_files", + "StreamResponse", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py index 10b46ff8e3..3592ea6bac 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_api.py @@ -16,3 +16,4 @@ class BaseAPI: self._post = client.post self._put = client.put self._patch = client.patch + self._get_api_list = client.get_api_list diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py new file mode 100644 index 0000000000..92a5d683be --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_compat.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +from collections.abc import Callable +from datetime import date, datetime +from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, overload + +import pydantic +from pydantic.fields import FieldInfo +from typing_extensions import Self + +from ._base_type import StrBytesIntFloat + +_T = TypeVar("_T") +_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel) + +# --------------- Pydantic v2 compatibility --------------- + +# Pyright incorrectly reports some of our functions as overriding a method when they don't +# pyright: reportIncompatibleMethodOverride=false + +PYDANTIC_V2 = pydantic.VERSION.startswith("2.") + +# v1 re-exports +if TYPE_CHECKING: + + def parse_date(value: date | StrBytesIntFloat) -> date: ... + + def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime: ... + + def get_args(t: type[Any]) -> tuple[Any, ...]: ... + + def is_union(tp: type[Any] | None) -> bool: ... + + def get_origin(t: type[Any]) -> type[Any] | None: ... + + def is_literal_type(type_: type[Any]) -> bool: ... + + def is_typeddict(type_: type[Any]) -> bool: ... + +else: + if PYDANTIC_V2: + from pydantic.v1.typing import ( # noqa: I001 + get_args as get_args, # noqa: PLC0414 + is_union as is_union, # noqa: PLC0414 + get_origin as get_origin, # noqa: PLC0414 + is_typeddict as is_typeddict, # noqa: PLC0414 + is_literal_type as is_literal_type, # noqa: PLC0414 + ) + from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime # noqa: PLC0414 + else: + from pydantic.typing import ( # noqa: I001 + get_args as get_args, # noqa: PLC0414 + is_union as is_union, # noqa: PLC0414 + get_origin as get_origin, # noqa: PLC0414 + is_typeddict as is_typeddict, # noqa: PLC0414 + is_literal_type as is_literal_type, # noqa: PLC0414 + ) + from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime # noqa: PLC0414 + + +# refactored config +if TYPE_CHECKING: + from pydantic import ConfigDict +else: + if PYDANTIC_V2: + from pydantic import ConfigDict + else: + # TODO: provide an error message here? + ConfigDict = None + + +# renamed methods / properties +def parse_obj(model: type[_ModelT], value: object) -> _ModelT: + if PYDANTIC_V2: + return model.model_validate(value) + else: + # pyright: ignore[reportDeprecated, reportUnnecessaryCast] + return cast(_ModelT, model.parse_obj(value)) + + +def field_is_required(field: FieldInfo) -> bool: + if PYDANTIC_V2: + return field.is_required() + return field.required # type: ignore + + +def field_get_default(field: FieldInfo) -> Any: + value = field.get_default() + if PYDANTIC_V2: + from pydantic_core import PydanticUndefined + + if value == PydanticUndefined: + return None + return value + return value + + +def field_outer_type(field: FieldInfo) -> Any: + if PYDANTIC_V2: + return field.annotation + return field.outer_type_ # type: ignore + + +def get_model_config(model: type[pydantic.BaseModel]) -> Any: + if PYDANTIC_V2: + return model.model_config + return model.__config__ # type: ignore + + +def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]: + if PYDANTIC_V2: + return model.model_fields + return model.__fields__ # type: ignore + + +def model_copy(model: _ModelT) -> _ModelT: + if PYDANTIC_V2: + return model.model_copy() + return model.copy() # type: ignore + + +def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str: + if PYDANTIC_V2: + return model.model_dump_json(indent=indent) + return model.json(indent=indent) # type: ignore + + +def model_dump( + model: pydantic.BaseModel, + *, + exclude_unset: bool = False, + exclude_defaults: bool = False, +) -> dict[str, Any]: + if PYDANTIC_V2: + return model.model_dump( + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + ) + return cast( + "dict[str, Any]", + model.dict( # pyright: ignore[reportDeprecated, reportUnnecessaryCast] + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + ), + ) + + +def model_parse(model: type[_ModelT], data: Any) -> _ModelT: + if PYDANTIC_V2: + return model.model_validate(data) + return model.parse_obj(data) # pyright: ignore[reportDeprecated] + + +# generic models +if TYPE_CHECKING: + + class GenericModel(pydantic.BaseModel): ... + +else: + if PYDANTIC_V2: + # there no longer needs to be a distinction in v2 but + # we still have to create our own subclass to avoid + # inconsistent MRO ordering errors + class GenericModel(pydantic.BaseModel): ... + + else: + import pydantic.generics + + class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ... + + +# cached properties +if TYPE_CHECKING: + cached_property = property + + # we define a separate type (copied from typeshed) + # that represents that `cached_property` is `set`able + # at runtime, which differs from `@property`. + # + # this is a separate type as editors likely special case + # `@property` and we don't want to cause issues just to have + # more helpful internal types. + + class typed_cached_property(Generic[_T]): # noqa: N801 + func: Callable[[Any], _T] + attrname: str | None + + def __init__(self, func: Callable[[Any], _T]) -> None: ... + + @overload + def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ... + + @overload + def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ... + + def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self: + raise NotImplementedError() + + def __set_name__(self, owner: type[Any], name: str) -> None: ... + + # __set__ is not defined at runtime, but @cached_property is designed to be settable + def __set__(self, instance: object, value: _T) -> None: ... +else: + try: + from functools import cached_property + except ImportError: + from cached_property import cached_property + + typed_cached_property = cached_property diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py new file mode 100644 index 0000000000..5e9a7e0a98 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_models.py @@ -0,0 +1,671 @@ +from __future__ import annotations + +import inspect +import os +from collections.abc import Callable +from datetime import date, datetime +from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, TypeGuard, TypeVar, cast + +import pydantic +import pydantic.generics +from pydantic.fields import FieldInfo +from typing_extensions import ( + ParamSpec, + Protocol, + override, + runtime_checkable, +) + +from ._base_compat import ( + PYDANTIC_V2, + ConfigDict, + field_get_default, + get_args, + get_model_config, + get_model_fields, + get_origin, + is_literal_type, + is_union, + parse_obj, +) +from ._base_compat import ( + GenericModel as BaseGenericModel, +) +from ._base_type import ( + IncEx, + ModelT, +) +from ._utils import ( + PropertyInfo, + coerce_boolean, + extract_type_arg, + is_annotated_type, + is_list, + is_mapping, + parse_date, + parse_datetime, + strip_annotated_type, +) + +if TYPE_CHECKING: + from pydantic_core.core_schema import LiteralSchema, ModelField, ModelFieldsSchema + +__all__ = ["BaseModel", "GenericModel"] +_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel") + +_T = TypeVar("_T") +P = ParamSpec("P") + + +@runtime_checkable +class _ConfigProtocol(Protocol): + allow_population_by_field_name: bool + + +class BaseModel(pydantic.BaseModel): + if PYDANTIC_V2: + model_config: ClassVar[ConfigDict] = ConfigDict( + extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true")) + ) + else: + + @property + @override + def model_fields_set(self) -> set[str]: + # a forwards-compat shim for pydantic v2 + return self.__fields_set__ # type: ignore + + class Config(pydantic.BaseConfig): # pyright: ignore[reportDeprecated] + extra: Any = pydantic.Extra.allow # type: ignore + + def to_dict( + self, + *, + mode: Literal["json", "python"] = "python", + use_api_names: bool = True, + exclude_unset: bool = True, + exclude_defaults: bool = False, + exclude_none: bool = False, + warnings: bool = True, + ) -> dict[str, object]: + """Recursively generate a dictionary representation of the model, optionally specifying which fields to include or exclude. + + By default, fields that were not set by the API will not be included, + and keys will match the API response, *not* the property names from the model. + + For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property, + the output will use the `"fooBar"` key (unless `use_api_names=False` is passed). + + Args: + mode: + If mode is 'json', the dictionary will only contain JSON serializable types. e.g. `datetime` will be turned into a string, `"2024-3-22T18:11:19.117000Z"`. + If mode is 'python', the dictionary may contain any Python objects. e.g. `datetime(2024, 3, 22)` + + use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`. + exclude_unset: Whether to exclude fields that have not been explicitly set. + exclude_defaults: Whether to exclude fields that are set to their default value from the output. + exclude_none: Whether to exclude fields that have a value of `None` from the output. + warnings: Whether to log warnings when invalid fields are encountered. This is only supported in Pydantic v2. + """ # noqa: E501 + return self.model_dump( + mode=mode, + by_alias=use_api_names, + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + exclude_none=exclude_none, + warnings=warnings, + ) + + def to_json( + self, + *, + indent: int | None = 2, + use_api_names: bool = True, + exclude_unset: bool = True, + exclude_defaults: bool = False, + exclude_none: bool = False, + warnings: bool = True, + ) -> str: + """Generates a JSON string representing this model as it would be received from or sent to the API (but with indentation). + + By default, fields that were not set by the API will not be included, + and keys will match the API response, *not* the property names from the model. + + For example, if the API responds with `"fooBar": true` but we've defined a `foo_bar: bool` property, + the output will use the `"fooBar"` key (unless `use_api_names=False` is passed). + + Args: + indent: Indentation to use in the JSON output. If `None` is passed, the output will be compact. Defaults to `2` + use_api_names: Whether to use the key that the API responded with or the property name. Defaults to `True`. + exclude_unset: Whether to exclude fields that have not been explicitly set. + exclude_defaults: Whether to exclude fields that have the default value. + exclude_none: Whether to exclude fields that have a value of `None`. + warnings: Whether to show any warnings that occurred during serialization. This is only supported in Pydantic v2. + """ # noqa: E501 + return self.model_dump_json( + indent=indent, + by_alias=use_api_names, + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + exclude_none=exclude_none, + warnings=warnings, + ) + + @override + def __str__(self) -> str: + # mypy complains about an invalid self arg + return f'{self.__repr_name__()}({self.__repr_str__(", ")})' # type: ignore[misc] + + # Override the 'construct' method in a way that supports recursive parsing without validation. + # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836. + @classmethod + @override + def construct( + cls: type[ModelT], + _fields_set: set[str] | None = None, + **values: object, + ) -> ModelT: + m = cls.__new__(cls) + fields_values: dict[str, object] = {} + + config = get_model_config(cls) + populate_by_name = ( + config.allow_population_by_field_name + if isinstance(config, _ConfigProtocol) + else config.get("populate_by_name") + ) + + if _fields_set is None: + _fields_set = set() + + model_fields = get_model_fields(cls) + for name, field in model_fields.items(): + key = field.alias + if key is None or (key not in values and populate_by_name): + key = name + + if key in values: + fields_values[name] = _construct_field(value=values[key], field=field, key=key) + _fields_set.add(name) + else: + fields_values[name] = field_get_default(field) + + _extra = {} + for key, value in values.items(): + if key not in model_fields: + if PYDANTIC_V2: + _extra[key] = value + else: + _fields_set.add(key) + fields_values[key] = value + + object.__setattr__(m, "__dict__", fields_values) # noqa: PLC2801 + + if PYDANTIC_V2: + # these properties are copied from Pydantic's `model_construct()` method + object.__setattr__(m, "__pydantic_private__", None) # noqa: PLC2801 + object.__setattr__(m, "__pydantic_extra__", _extra) # noqa: PLC2801 + object.__setattr__(m, "__pydantic_fields_set__", _fields_set) # noqa: PLC2801 + else: + # init_private_attributes() does not exist in v2 + m._init_private_attributes() # type: ignore + + # copied from Pydantic v1's `construct()` method + object.__setattr__(m, "__fields_set__", _fields_set) # noqa: PLC2801 + + return m + + if not TYPE_CHECKING: + # type checkers incorrectly complain about this assignment + # because the type signatures are technically different + # although not in practice + model_construct = construct + + if not PYDANTIC_V2: + # we define aliases for some of the new pydantic v2 methods so + # that we can just document these methods without having to specify + # a specific pydantic version as some users may not know which + # pydantic version they are currently using + + @override + def model_dump( + self, + *, + mode: Literal["json", "python"] | str = "python", + include: IncEx = None, + exclude: IncEx = None, + by_alias: bool = False, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + round_trip: bool = False, + warnings: bool | Literal["none", "warn", "error"] = True, + context: dict[str, Any] | None = None, + serialize_as_any: bool = False, + ) -> dict[str, Any]: + """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump + + Generate a dictionary representation of the model, optionally specifying which fields to include or exclude. + + Args: + mode: The mode in which `to_python` should run. + If mode is 'json', the dictionary will only contain JSON serializable types. + If mode is 'python', the dictionary may contain any Python objects. + include: A list of fields to include in the output. + exclude: A list of fields to exclude from the output. + by_alias: Whether to use the field's alias in the dictionary key if defined. + exclude_unset: Whether to exclude fields that are unset or None from the output. + exclude_defaults: Whether to exclude fields that are set to their default value from the output. + exclude_none: Whether to exclude fields that have a value of `None` from the output. + round_trip: Whether to enable serialization and deserialization round-trip support. + warnings: Whether to log warnings when invalid fields are encountered. + + Returns: + A dictionary representation of the model. + """ + if mode != "python": + raise ValueError("mode is only supported in Pydantic v2") + if round_trip != False: + raise ValueError("round_trip is only supported in Pydantic v2") + if warnings != True: + raise ValueError("warnings is only supported in Pydantic v2") + if context is not None: + raise ValueError("context is only supported in Pydantic v2") + if serialize_as_any != False: + raise ValueError("serialize_as_any is only supported in Pydantic v2") + return super().dict( # pyright: ignore[reportDeprecated] + include=include, + exclude=exclude, + by_alias=by_alias, + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + exclude_none=exclude_none, + ) + + @override + def model_dump_json( + self, + *, + indent: int | None = None, + include: IncEx = None, + exclude: IncEx = None, + by_alias: bool = False, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + round_trip: bool = False, + warnings: bool | Literal["none", "warn", "error"] = True, + context: dict[str, Any] | None = None, + serialize_as_any: bool = False, + ) -> str: + """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json + + Generates a JSON representation of the model using Pydantic's `to_json` method. + + Args: + indent: Indentation to use in the JSON output. If None is passed, the output will be compact. + include: Field(s) to include in the JSON output. Can take either a string or set of strings. + exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings. + by_alias: Whether to serialize using field aliases. + exclude_unset: Whether to exclude fields that have not been explicitly set. + exclude_defaults: Whether to exclude fields that have the default value. + exclude_none: Whether to exclude fields that have a value of `None`. + round_trip: Whether to use serialization/deserialization between JSON and class instance. + warnings: Whether to show any warnings that occurred during serialization. + + Returns: + A JSON string representation of the model. + """ + if round_trip != False: + raise ValueError("round_trip is only supported in Pydantic v2") + if warnings != True: + raise ValueError("warnings is only supported in Pydantic v2") + if context is not None: + raise ValueError("context is only supported in Pydantic v2") + if serialize_as_any != False: + raise ValueError("serialize_as_any is only supported in Pydantic v2") + return super().json( # type: ignore[reportDeprecated] + indent=indent, + include=include, + exclude=exclude, + by_alias=by_alias, + exclude_unset=exclude_unset, + exclude_defaults=exclude_defaults, + exclude_none=exclude_none, + ) + + +def _construct_field(value: object, field: FieldInfo, key: str) -> object: + if value is None: + return field_get_default(field) + + if PYDANTIC_V2: + type_ = field.annotation + else: + type_ = cast(type, field.outer_type_) # type: ignore + + if type_ is None: + raise RuntimeError(f"Unexpected field type is None for {key}") + + return construct_type(value=value, type_=type_) + + +def is_basemodel(type_: type) -> bool: + """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`""" + if is_union(type_): + return any(is_basemodel(variant) for variant in get_args(type_)) + + return is_basemodel_type(type_) + + +def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]: + origin = get_origin(type_) or type_ + return issubclass(origin, BaseModel) or issubclass(origin, GenericModel) + + +def build( + base_model_cls: Callable[P, _BaseModelT], + *args: P.args, + **kwargs: P.kwargs, +) -> _BaseModelT: + """Construct a BaseModel class without validation. + + This is useful for cases where you need to instantiate a `BaseModel` + from an API response as this provides type-safe params which isn't supported + by helpers like `construct_type()`. + + ```py + build(MyModel, my_field_a="foo", my_field_b=123) + ``` + """ + if args: + raise TypeError( + "Received positional arguments which are not supported; Keyword arguments must be used instead", + ) + + return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs)) + + +def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T: + """Loose coercion to the expected type with construction of nested values. + + Note: the returned value from this function is not guaranteed to match the + given type. + """ + return cast(_T, construct_type(value=value, type_=type_)) + + +def construct_type(*, value: object, type_: type) -> object: + """Loose coercion to the expected type with construction of nested values. + + If the given value does not match the expected type then it is returned as-is. + """ + # we allow `object` as the input type because otherwise, passing things like + # `Literal['value']` will be reported as a type error by type checkers + type_ = cast("type[object]", type_) + + # unwrap `Annotated[T, ...]` -> `T` + if is_annotated_type(type_): + meta: tuple[Any, ...] = get_args(type_)[1:] + type_ = extract_type_arg(type_, 0) + else: + meta = () + # we need to use the origin class for any types that are subscripted generics + # e.g. Dict[str, object] + origin = get_origin(type_) or type_ + args = get_args(type_) + + if is_union(origin): + try: + return validate_type(type_=cast("type[object]", type_), value=value) + except Exception: + pass + + # if the type is a discriminated union then we want to construct the right variant + # in the union, even if the data doesn't match exactly, otherwise we'd break code + # that relies on the constructed class types, e.g. + # + # class FooType: + # kind: Literal['foo'] + # value: str + # + # class BarType: + # kind: Literal['bar'] + # value: int + # + # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then + # we'd end up constructing `FooType` when it should be `BarType`. + discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta) + if discriminator and is_mapping(value): + variant_value = value.get(discriminator.field_alias_from or discriminator.field_name) + if variant_value and isinstance(variant_value, str): + variant_type = discriminator.mapping.get(variant_value) + if variant_type: + return construct_type(type_=variant_type, value=value) + + # if the data is not valid, use the first variant that doesn't fail while deserializing + for variant in args: + try: + return construct_type(value=value, type_=variant) + except Exception: + continue + + raise RuntimeError(f"Could not convert data into a valid instance of {type_}") + if origin == dict: + if not is_mapping(value): + return value + + _, items_type = get_args(type_) # Dict[_, items_type] + return {key: construct_type(value=item, type_=items_type) for key, item in value.items()} + + if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)): + if is_list(value): + return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value] + + if is_mapping(value): + if issubclass(type_, BaseModel): + return type_.construct(**value) # type: ignore[arg-type] + + return cast(Any, type_).construct(**value) + + if origin == list: + if not is_list(value): + return value + + inner_type = args[0] # List[inner_type] + return [construct_type(value=entry, type_=inner_type) for entry in value] + + if origin == float: + if isinstance(value, int): + coerced = float(value) + if coerced != value: + return value + return coerced + + return value + + if type_ == datetime: + try: + return parse_datetime(value) # type: ignore + except Exception: + return value + + if type_ == date: + try: + return parse_date(value) # type: ignore + except Exception: + return value + + return value + + +@runtime_checkable +class CachedDiscriminatorType(Protocol): + __discriminator__: DiscriminatorDetails + + +class DiscriminatorDetails: + field_name: str + """The name of the discriminator field in the variant class, e.g. + + ```py + class Foo(BaseModel): + type: Literal['foo'] + ``` + + Will result in field_name='type' + """ + + field_alias_from: str | None + """The name of the discriminator field in the API response, e.g. + + ```py + class Foo(BaseModel): + type: Literal['foo'] = Field(alias='type_from_api') + ``` + + Will result in field_alias_from='type_from_api' + """ + + mapping: dict[str, type] + """Mapping of discriminator value to variant type, e.g. + + {'foo': FooVariant, 'bar': BarVariant} + """ + + def __init__( + self, + *, + mapping: dict[str, type], + discriminator_field: str, + discriminator_alias: str | None, + ) -> None: + self.mapping = mapping + self.field_name = discriminator_field + self.field_alias_from = discriminator_alias + + +def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None: + if isinstance(union, CachedDiscriminatorType): + return union.__discriminator__ + + discriminator_field_name: str | None = None + + for annotation in meta_annotations: + if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None: + discriminator_field_name = annotation.discriminator + break + + if not discriminator_field_name: + return None + + mapping: dict[str, type] = {} + discriminator_alias: str | None = None + + for variant in get_args(union): + variant = strip_annotated_type(variant) + if is_basemodel_type(variant): + if PYDANTIC_V2: + field = _extract_field_schema_pv2(variant, discriminator_field_name) + if not field: + continue + + # Note: if one variant defines an alias then they all should + discriminator_alias = field.get("serialization_alias") + + field_schema = field["schema"] + + if field_schema["type"] == "literal": + for entry in cast("LiteralSchema", field_schema)["expected"]: + if isinstance(entry, str): + mapping[entry] = variant + else: + field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name) # pyright: ignore[reportDeprecated, reportUnnecessaryCast] + if not field_info: + continue + + # Note: if one variant defines an alias then they all should + discriminator_alias = field_info.alias + + if field_info.annotation and is_literal_type(field_info.annotation): + for entry in get_args(field_info.annotation): + if isinstance(entry, str): + mapping[entry] = variant + + if not mapping: + return None + + details = DiscriminatorDetails( + mapping=mapping, + discriminator_field=discriminator_field_name, + discriminator_alias=discriminator_alias, + ) + cast(CachedDiscriminatorType, union).__discriminator__ = details + return details + + +def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None: + schema = model.__pydantic_core_schema__ + if schema["type"] != "model": + return None + + fields_schema = schema["schema"] + if fields_schema["type"] != "model-fields": + return None + + fields_schema = cast("ModelFieldsSchema", fields_schema) + + field = fields_schema["fields"].get(field_name) + if not field: + return None + + return cast("ModelField", field) # pyright: ignore[reportUnnecessaryCast] + + +def validate_type(*, type_: type[_T], value: object) -> _T: + """Strict validation that the given value matches the expected type""" + if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel): + return cast(_T, parse_obj(type_, value)) + + return cast(_T, _validate_non_model_type(type_=type_, value=value)) + + +# our use of subclasssing here causes weirdness for type checkers, +# so we just pretend that we don't subclass +if TYPE_CHECKING: + GenericModel = BaseModel +else: + + class GenericModel(BaseGenericModel, BaseModel): + pass + + +if PYDANTIC_V2: + from pydantic import TypeAdapter + + def _validate_non_model_type(*, type_: type[_T], value: object) -> _T: + return TypeAdapter(type_).validate_python(value) + +elif not TYPE_CHECKING: + + class TypeAdapter(Generic[_T]): + """Used as a placeholder to easily convert runtime types to a Pydantic format + to provide validation. + + For example: + ```py + validated = RootModel[int](__root__="5").__root__ + # validated: 5 + ``` + """ + + def __init__(self, type_: type[_T]): + self.type_ = type_ + + def validate_python(self, value: Any) -> _T: + if not isinstance(value, self.type_): + raise ValueError(f"Invalid type: {value} is not of type {self.type_}") + return value + + def _validate_non_model_type(*, type_: type[_T], value: object) -> _T: + return TypeAdapter(type_).validate_python(value) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py index 7a91f9b796..ea1d3f09dc 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_base_type.py @@ -1,11 +1,21 @@ from __future__ import annotations -from collections.abc import Mapping, Sequence +from collections.abc import Callable, Mapping, Sequence from os import PathLike -from typing import IO, TYPE_CHECKING, Any, Literal, TypeVar, Union +from typing import ( + IO, + TYPE_CHECKING, + Any, + Literal, + Optional, + TypeAlias, + TypeVar, + Union, +) import pydantic -from typing_extensions import override +from httpx import Response +from typing_extensions import Protocol, TypedDict, override, runtime_checkable Query = Mapping[str, object] Body = object @@ -22,7 +32,7 @@ else: # Sentinel class used until PEP 0661 is accepted -class NotGiven(pydantic.BaseModel): +class NotGiven: """ A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior). @@ -50,7 +60,7 @@ NotGivenOr = Union[_T, NotGiven] NOT_GIVEN = NotGiven() -class Omit(pydantic.BaseModel): +class Omit: """In certain situations you need to be able to represent a case where a default value has to be explicitly removed and `None` is not an appropriate substitute, for example: @@ -71,37 +81,90 @@ class Omit(pydantic.BaseModel): return False +@runtime_checkable +class ModelBuilderProtocol(Protocol): + @classmethod + def build( + cls: type[_T], + *, + response: Response, + data: object, + ) -> _T: ... + + Headers = Mapping[str, Union[str, Omit]] + +class HeadersLikeProtocol(Protocol): + def get(self, __key: str) -> str | None: ... + + +HeadersLike = Union[Headers, HeadersLikeProtocol] + ResponseT = TypeVar( "ResponseT", - bound="Union[str, None, BaseModel, list[Any], Dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol," - " BinaryResponseContent]", + bound="Union[str, None, BaseModel, list[Any], dict[str, Any], Response, UnknownResponse, ModelBuilderProtocol, BinaryResponseContent]", # noqa: E501 ) +StrBytesIntFloat = Union[str, bytes, int, float] + +# Note: copied from Pydantic +# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49 +IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None" + +PostParser = Callable[[Any], Any] + + +@runtime_checkable +class InheritsGeneric(Protocol): + """Represents a type that has inherited from `Generic` + + The `__orig_bases__` property can be used to determine the resolved + type variable for a given base class. + """ + + __orig_bases__: tuple[_GenericAlias] + + +class _GenericAlias(Protocol): + __origin__: type[object] + + +class HttpxSendArgs(TypedDict, total=False): + auth: httpx.Auth + + # for user input files if TYPE_CHECKING: + Base64FileInput = Union[IO[bytes], PathLike[str]] FileContent = Union[IO[bytes], bytes, PathLike[str]] else: + Base64FileInput = Union[IO[bytes], PathLike] FileContent = Union[IO[bytes], bytes, PathLike] FileTypes = Union[ - FileContent, # file content - tuple[str, FileContent], # (filename, file) - tuple[str, FileContent, str], # (filename, file , content_type) - tuple[str, FileContent, str, Mapping[str, str]], # (filename, file , content_type, headers) + # file (or bytes) + FileContent, + # (filename, file (or bytes)) + tuple[Optional[str], FileContent], + # (filename, file (or bytes), content_type) + tuple[Optional[str], FileContent, Optional[str]], + # (filename, file (or bytes), content_type, headers) + tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]], ] - RequestFiles = Union[Mapping[str, FileTypes], Sequence[tuple[str, FileTypes]]] -# for httpx client supported files - +# duplicate of the above but without our custom file support HttpxFileContent = Union[bytes, IO[bytes]] HttpxFileTypes = Union[ - FileContent, # file content - tuple[str, HttpxFileContent], # (filename, file) - tuple[str, HttpxFileContent, str], # (filename, file , content_type) - tuple[str, HttpxFileContent, str, Mapping[str, str]], # (filename, file , content_type, headers) + # file (or bytes) + HttpxFileContent, + # (filename, file (or bytes)) + tuple[Optional[str], HttpxFileContent], + # (filename, file (or bytes), content_type) + tuple[Optional[str], HttpxFileContent, Optional[str]], + # (filename, file (or bytes), content_type, headers) + tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]], ] HttpxRequestFiles = Union[Mapping[str, HttpxFileTypes], Sequence[tuple[str, HttpxFileTypes]]] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py new file mode 100644 index 0000000000..8e43bdebec --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_constants.py @@ -0,0 +1,12 @@ +import httpx + +RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response" +# 通过 `Timeout` 控制接口`connect` 和 `read` 超时时间,默认为`timeout=300.0, connect=8.0` +ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0) +# 通过 `retry` 参数控制重试次数,默认为3次 +ZHIPUAI_DEFAULT_MAX_RETRIES = 3 +# 通过 `Limits` 控制最大连接数和保持连接数,默认为`max_connections=50, max_keepalive_connections=10` +ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=50, max_keepalive_connections=10) + +INITIAL_RETRY_DELAY = 0.5 +MAX_RETRY_DELAY = 8.0 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py index 1027c1bc5b..e2c9d24c6c 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_errors.py @@ -13,6 +13,7 @@ __all__ = [ "APIResponseError", "APIResponseValidationError", "APITimeoutError", + "APIConnectionError", ] @@ -24,7 +25,7 @@ class ZhipuAIError(Exception): super().__init__(message) -class APIStatusError(Exception): +class APIStatusError(ZhipuAIError): response: httpx.Response status_code: int @@ -49,7 +50,7 @@ class APIInternalError(APIStatusError): ... class APIServerFlowExceedError(APIStatusError): ... -class APIResponseError(Exception): +class APIResponseError(ZhipuAIError): message: str request: httpx.Request json_data: object @@ -75,9 +76,11 @@ class APIResponseValidationError(APIResponseError): self.status_code = response.status_code -class APITimeoutError(Exception): - request: httpx.Request +class APIConnectionError(APIResponseError): + def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None: + super().__init__(message, request, json_data=None) - def __init__(self, request: httpx.Request): - self.request = request - super().__init__("Request Timeout") + +class APITimeoutError(APIConnectionError): + def __init__(self, request: httpx.Request) -> None: + super().__init__(message="Request timed out.", request=request) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py index 0796bfe11c..f9d2e14d9e 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_files.py @@ -2,40 +2,74 @@ from __future__ import annotations import io import os -from collections.abc import Mapping, Sequence -from pathlib import Path +import pathlib +from typing import TypeGuard, overload -from ._base_type import FileTypes, HttpxFileTypes, HttpxRequestFiles, RequestFiles +from ._base_type import ( + Base64FileInput, + FileContent, + FileTypes, + HttpxFileContent, + HttpxFileTypes, + HttpxRequestFiles, + RequestFiles, +) +from ._utils import is_mapping_t, is_sequence_t, is_tuple_t -def is_file_content(obj: object) -> bool: +def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]: + return isinstance(obj, io.IOBase | os.PathLike) + + +def is_file_content(obj: object) -> TypeGuard[FileContent]: return isinstance(obj, bytes | tuple | io.IOBase | os.PathLike) +def assert_is_file_content(obj: object, *, key: str | None = None) -> None: + if not is_file_content(obj): + prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`" + raise RuntimeError( + f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/openai/openai-python/tree/main#file-uploads" + ) from None + + +@overload +def to_httpx_files(files: None) -> None: ... + + +@overload +def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ... + + +def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None: + if files is None: + return None + + if is_mapping_t(files): + files = {key: _transform_file(file) for key, file in files.items()} + elif is_sequence_t(files): + files = [(key, _transform_file(file)) for key, file in files] + else: + raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence") + + return files + + def _transform_file(file: FileTypes) -> HttpxFileTypes: if is_file_content(file): if isinstance(file, os.PathLike): - path = Path(file) - return path.name, path.read_bytes() - else: - return file - if isinstance(file, tuple): - if isinstance(file[1], os.PathLike): - return (file[0], Path(file[1]).read_bytes(), *file[2:]) - else: - return (file[0], file[1], *file[2:]) - else: - raise TypeError(f"Unexpected input file with type {type(file)},Expected FileContent type or tuple type") + path = pathlib.Path(file) + return (path.name, path.read_bytes()) + + return file + + if is_tuple_t(file): + return (file[0], _read_file_content(file[1]), *file[2:]) + + raise TypeError("Expected file types input to be a FileContent type or to be a tuple") -def make_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None: - if files is None: - return None - - if isinstance(files, Mapping): - files = {key: _transform_file(file) for key, file in files.items()} - elif isinstance(files, Sequence): - files = [(key, _transform_file(file)) for key, file in files] - else: - raise TypeError(f"Unexpected input file with type {type(files)}, excepted Mapping or Sequence") - return files +def _read_file_content(file: FileContent) -> HttpxFileContent: + if isinstance(file, os.PathLike): + return pathlib.Path(file).read_bytes() + return file diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py index 5f7f6d04f2..d0f933d814 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_http_client.py @@ -1,23 +1,70 @@ from __future__ import annotations import inspect -from collections.abc import Mapping -from typing import Any, Union, cast +import logging +import time +import warnings +from collections.abc import Iterator, Mapping +from itertools import starmap +from random import random +from typing import TYPE_CHECKING, Any, Generic, Literal, Optional, TypeVar, Union, cast, overload import httpx import pydantic from httpx import URL, Timeout -from tenacity import retry -from tenacity.stop import stop_after_attempt -from . import _errors -from ._base_type import NOT_GIVEN, AnyMapping, Body, Data, Headers, NotGiven, Query, RequestFiles, ResponseT -from ._errors import APIResponseValidationError, APIStatusError, APITimeoutError -from ._files import make_httpx_files -from ._request_opt import ClientRequestParam, UserRequestInput -from ._response import HttpResponse +from . import _errors, get_origin +from ._base_compat import model_copy +from ._base_models import GenericModel, construct_type, validate_type +from ._base_type import ( + NOT_GIVEN, + AnyMapping, + Body, + Data, + Headers, + HttpxSendArgs, + ModelBuilderProtocol, + NotGiven, + Omit, + PostParser, + Query, + RequestFiles, + ResponseT, +) +from ._constants import ( + INITIAL_RETRY_DELAY, + MAX_RETRY_DELAY, + RAW_RESPONSE_HEADER, + ZHIPUAI_DEFAULT_LIMITS, + ZHIPUAI_DEFAULT_MAX_RETRIES, + ZHIPUAI_DEFAULT_TIMEOUT, +) +from ._errors import APIConnectionError, APIResponseValidationError, APIStatusError, APITimeoutError +from ._files import to_httpx_files +from ._legacy_response import LegacyAPIResponse +from ._request_opt import FinalRequestOptions, UserRequestInput +from ._response import APIResponse, BaseAPIResponse, extract_response_type from ._sse_client import StreamResponse -from ._utils import flatten +from ._utils import flatten, is_given, is_mapping + +log: logging.Logger = logging.getLogger(__name__) + +# TODO: make base page type vars covariant +SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]") +# AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]") + +_T = TypeVar("_T") +_T_co = TypeVar("_T_co", covariant=True) + +if TYPE_CHECKING: + from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT +else: + try: + from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT + except ImportError: + # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366 + HTTPX_DEFAULT_TIMEOUT = Timeout(5.0) + headers = { "Accept": "application/json", @@ -25,50 +72,180 @@ headers = { } -def _merge_map(map1: Mapping, map2: Mapping) -> Mapping: - merged = {**map1, **map2} - return {key: val for key, val in merged.items() if val is not None} +class PageInfo: + """Stores the necessary information to build the request to retrieve the next page. + + Either `url` or `params` must be set. + """ + + url: URL | NotGiven + params: Query | NotGiven + + @overload + def __init__( + self, + *, + url: URL, + ) -> None: ... + + @overload + def __init__( + self, + *, + params: Query, + ) -> None: ... + + def __init__( + self, + *, + url: URL | NotGiven = NOT_GIVEN, + params: Query | NotGiven = NOT_GIVEN, + ) -> None: + self.url = url + self.params = params -from itertools import starmap +class BasePage(GenericModel, Generic[_T]): + """ + Defines the core interface for pagination. -from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT + Type Args: + ModelT: The pydantic model that represents an item in the response. -ZHIPUAI_DEFAULT_TIMEOUT = httpx.Timeout(timeout=300.0, connect=8.0) -ZHIPUAI_DEFAULT_MAX_RETRIES = 3 -ZHIPUAI_DEFAULT_LIMITS = httpx.Limits(max_connections=5, max_keepalive_connections=5) + Methods: + has_next_page(): Check if there is another page available + next_page_info(): Get the necessary information to make a request for the next page + """ + + _options: FinalRequestOptions = pydantic.PrivateAttr() + _model: type[_T] = pydantic.PrivateAttr() + + def has_next_page(self) -> bool: + items = self._get_page_items() + if not items: + return False + return self.next_page_info() is not None + + def next_page_info(self) -> Optional[PageInfo]: ... + + def _get_page_items(self) -> Iterable[_T]: # type: ignore[empty-body] + ... + + def _params_from_url(self, url: URL) -> httpx.QueryParams: + # TODO: do we have to preprocess params here? + return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params) + + def _info_to_options(self, info: PageInfo) -> FinalRequestOptions: + options = model_copy(self._options) + options._strip_raw_response_header() + + if not isinstance(info.params, NotGiven): + options.params = {**options.params, **info.params} + return options + + if not isinstance(info.url, NotGiven): + params = self._params_from_url(info.url) + url = info.url.copy_with(params=params) + options.params = dict(url.params) + options.url = str(url) + return options + + raise ValueError("Unexpected PageInfo state") + + +class BaseSyncPage(BasePage[_T], Generic[_T]): + _client: HttpClient = pydantic.PrivateAttr() + + def _set_private_attributes( + self, + client: HttpClient, + model: type[_T], + options: FinalRequestOptions, + ) -> None: + self._model = model + self._client = client + self._options = options + + # Pydantic uses a custom `__iter__` method to support casting BaseModels + # to dictionaries. e.g. dict(model). + # As we want to support `for item in page`, this is inherently incompatible + # with the default pydantic behaviour. It is not possible to support both + # use cases at once. Fortunately, this is not a big deal as all other pydantic + # methods should continue to work as expected as there is an alternative method + # to cast a model to a dictionary, model.dict(), which is used internally + # by pydantic. + def __iter__(self) -> Iterator[_T]: # type: ignore + for page in self.iter_pages(): + yield from page._get_page_items() + + def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]: + page = self + while True: + yield page + if page.has_next_page(): + page = page.get_next_page() + else: + return + + def get_next_page(self: SyncPageT) -> SyncPageT: + info = self.next_page_info() + if not info: + raise RuntimeError( + "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`." + ) + + options = self._info_to_options(info) + return self._client._request_api_list(self._model, page=self.__class__, options=options) class HttpClient: _client: httpx.Client _version: str _base_url: URL - + max_retries: int timeout: Union[float, Timeout, None] _limits: httpx.Limits _has_custom_http_client: bool _default_stream_cls: type[StreamResponse[Any]] | None = None + _strict_response_validation: bool + def __init__( self, *, version: str, base_url: URL, + _strict_response_validation: bool, + max_retries: int = ZHIPUAI_DEFAULT_MAX_RETRIES, timeout: Union[float, Timeout, None], + limits: httpx.Limits | None = None, custom_httpx_client: httpx.Client | None = None, custom_headers: Mapping[str, str] | None = None, ) -> None: - if timeout is None or isinstance(timeout, NotGiven): + if limits is not None: + warnings.warn( + "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead", # noqa: E501 + category=DeprecationWarning, + stacklevel=3, + ) + if custom_httpx_client is not None: + raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`") + else: + limits = ZHIPUAI_DEFAULT_LIMITS + + if not is_given(timeout): if custom_httpx_client and custom_httpx_client.timeout != HTTPX_DEFAULT_TIMEOUT: timeout = custom_httpx_client.timeout else: timeout = ZHIPUAI_DEFAULT_TIMEOUT - self.timeout = cast(Timeout, timeout) + self.max_retries = max_retries + self.timeout = timeout + self._limits = limits self._has_custom_http_client = bool(custom_httpx_client) self._client = custom_httpx_client or httpx.Client( base_url=base_url, timeout=self.timeout, - limits=ZHIPUAI_DEFAULT_LIMITS, + limits=limits, ) self._version = version url = URL(url=base_url) @@ -76,6 +253,7 @@ class HttpClient: url = url.copy_with(raw_path=url.raw_path + b"/") self._base_url = url self._custom_headers = custom_headers or {} + self._strict_response_validation = _strict_response_validation def _prepare_url(self, url: str) -> URL: sub_url = URL(url) @@ -93,55 +271,101 @@ class HttpClient: "ZhipuAI-SDK-Ver": self._version, "source_type": "zhipu-sdk-python", "x-request-sdk": "zhipu-sdk-python", - **self._auth_headers, + **self.auth_headers, **self._custom_headers, } @property - def _auth_headers(self): + def custom_auth(self) -> httpx.Auth | None: + return None + + @property + def auth_headers(self): return {} - def _prepare_headers(self, request_param: ClientRequestParam) -> httpx.Headers: - custom_headers = request_param.headers or {} - headers_dict = _merge_map(self._default_headers, custom_headers) + def _prepare_headers(self, options: FinalRequestOptions) -> httpx.Headers: + custom_headers = options.headers or {} + headers_dict = _merge_mappings(self._default_headers, custom_headers) httpx_headers = httpx.Headers(headers_dict) return httpx_headers - def _prepare_request(self, request_param: ClientRequestParam) -> httpx.Request: + def _remaining_retries( + self, + remaining_retries: Optional[int], + options: FinalRequestOptions, + ) -> int: + return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries) + + def _calculate_retry_timeout( + self, + remaining_retries: int, + options: FinalRequestOptions, + response_headers: Optional[httpx.Headers] = None, + ) -> float: + max_retries = options.get_max_retries(self.max_retries) + + # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says. + # retry_after = self._parse_retry_after_header(response_headers) + # if retry_after is not None and 0 < retry_after <= 60: + # return retry_after + + nb_retries = max_retries - remaining_retries + + # Apply exponential backoff, but not more than the max. + sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY) + + # Apply some jitter, plus-or-minus half a second. + jitter = 1 - 0.25 * random() + timeout = sleep_seconds * jitter + return max(timeout, 0) + + def _build_request(self, options: FinalRequestOptions) -> httpx.Request: kwargs: dict[str, Any] = {} - json_data = request_param.json_data - headers = self._prepare_headers(request_param) - url = self._prepare_url(request_param.url) - json_data = request_param.json_data + headers = self._prepare_headers(options) + url = self._prepare_url(options.url) + json_data = options.json_data + if options.extra_json is not None: + if json_data is None: + json_data = cast(Body, options.extra_json) + elif is_mapping(json_data): + json_data = _merge_mappings(json_data, options.extra_json) + else: + raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`") + + content_type = headers.get("Content-Type") + # multipart/form-data; boundary=---abc-- if headers.get("Content-Type") == "multipart/form-data": - headers.pop("Content-Type") + if "boundary" not in content_type: + # only remove the header if the boundary hasn't been explicitly set + # as the caller doesn't want httpx to come up with their own boundary + headers.pop("Content-Type") if json_data: kwargs["data"] = self._make_multipartform(json_data) return self._client.build_request( headers=headers, - timeout=self.timeout if isinstance(request_param.timeout, NotGiven) else request_param.timeout, - method=request_param.method, + timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout, + method=options.method, url=url, json=json_data, - files=request_param.files, - params=request_param.params, + files=options.files, + params=options.params, **kwargs, ) - def _object_to_formdata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]: + def _object_to_formfata(self, key: str, value: Data | Mapping[object, object]) -> list[tuple[str, str]]: items = [] if isinstance(value, Mapping): for k, v in value.items(): - items.extend(self._object_to_formdata(f"{key}[{k}]", v)) + items.extend(self._object_to_formfata(f"{key}[{k}]", v)) return items if isinstance(value, list | tuple): for v in value: - items.extend(self._object_to_formdata(key + "[]", v)) + items.extend(self._object_to_formfata(key + "[]", v)) return items def _primitive_value_to_str(val) -> str: @@ -161,7 +385,7 @@ class HttpClient: return [(key, str_data)] def _make_multipartform(self, data: Mapping[object, object]) -> dict[str, object]: - items = flatten(list(starmap(self._object_to_formdata, data.items()))) + items = flatten(list(starmap(self._object_to_formfata, data.items()))) serialized: dict[str, object] = {} for key, value in items: @@ -170,20 +394,6 @@ class HttpClient: serialized[key] = value return serialized - def _parse_response( - self, - *, - cast_type: type[ResponseT], - response: httpx.Response, - enable_stream: bool, - request_param: ClientRequestParam, - stream_cls: type[StreamResponse[Any]] | None = None, - ) -> HttpResponse: - http_response = HttpResponse( - raw_response=response, cast_type=cast_type, client=self, enable_stream=enable_stream, stream_cls=stream_cls - ) - return http_response.parse() - def _process_response_data( self, *, @@ -194,14 +404,58 @@ class HttpClient: if data is None: return cast(ResponseT, None) - try: - if inspect.isclass(cast_type) and issubclass(cast_type, pydantic.BaseModel): - return cast(ResponseT, cast_type.validate(data)) + if cast_type is object: + return cast(ResponseT, data) - return cast(ResponseT, pydantic.TypeAdapter(cast_type).validate_python(data)) + try: + if inspect.isclass(cast_type) and issubclass(cast_type, ModelBuilderProtocol): + return cast(ResponseT, cast_type.build(response=response, data=data)) + + if self._strict_response_validation: + return cast(ResponseT, validate_type(type_=cast_type, value=data)) + + return cast(ResponseT, construct_type(type_=cast_type, value=data)) except pydantic.ValidationError as err: raise APIResponseValidationError(response=response, json_data=data) from err + def _should_stream_response_body(self, request: httpx.Request) -> bool: + return request.headers.get(RAW_RESPONSE_HEADER) == "stream" # type: ignore[no-any-return] + + def _should_retry(self, response: httpx.Response) -> bool: + # Note: this is not a standard header + should_retry_header = response.headers.get("x-should-retry") + + # If the server explicitly says whether or not to retry, obey. + if should_retry_header == "true": + log.debug("Retrying as header `x-should-retry` is set to `true`") + return True + if should_retry_header == "false": + log.debug("Not retrying as header `x-should-retry` is set to `false`") + return False + + # Retry on request timeouts. + if response.status_code == 408: + log.debug("Retrying due to status code %i", response.status_code) + return True + + # Retry on lock timeouts. + if response.status_code == 409: + log.debug("Retrying due to status code %i", response.status_code) + return True + + # Retry on rate limits. + if response.status_code == 429: + log.debug("Retrying due to status code %i", response.status_code) + return True + + # Retry internal errors. + if response.status_code >= 500: + log.debug("Retrying due to status code %i", response.status_code) + return True + + log.debug("Not retrying") + return False + def is_closed(self) -> bool: return self._client.is_closed @@ -214,117 +468,385 @@ class HttpClient: def __exit__(self, exc_type, exc_val, exc_tb): self.close() - @retry(stop=stop_after_attempt(ZHIPUAI_DEFAULT_MAX_RETRIES)) def request( + self, + cast_type: type[ResponseT], + options: FinalRequestOptions, + remaining_retries: Optional[int] = None, + *, + stream: bool = False, + stream_cls: type[StreamResponse] | None = None, + ) -> ResponseT | StreamResponse: + return self._request( + cast_type=cast_type, + options=options, + stream=stream, + stream_cls=stream_cls, + remaining_retries=remaining_retries, + ) + + def _request( self, *, cast_type: type[ResponseT], - params: ClientRequestParam, - enable_stream: bool = False, - stream_cls: type[StreamResponse[Any]] | None = None, + options: FinalRequestOptions, + remaining_retries: int | None, + stream: bool, + stream_cls: type[StreamResponse] | None, ) -> ResponseT | StreamResponse: - request = self._prepare_request(params) + retries = self._remaining_retries(remaining_retries, options) + request = self._build_request(options) + kwargs: HttpxSendArgs = {} + if self.custom_auth is not None: + kwargs["auth"] = self.custom_auth try: response = self._client.send( request, - stream=enable_stream, + stream=stream or self._should_stream_response_body(request=request), + **kwargs, ) - response.raise_for_status() except httpx.TimeoutException as err: + log.debug("Encountered httpx.TimeoutException", exc_info=True) + + if retries > 0: + return self._retry_request( + options, + cast_type, + retries, + stream=stream, + stream_cls=stream_cls, + response_headers=None, + ) + + log.debug("Raising timeout error") raise APITimeoutError(request=request) from err - except httpx.HTTPStatusError as err: - err.response.read() - # raise err + except Exception as err: + log.debug("Encountered Exception", exc_info=True) + + if retries > 0: + return self._retry_request( + options, + cast_type, + retries, + stream=stream, + stream_cls=stream_cls, + response_headers=None, + ) + + log.debug("Raising connection error") + raise APIConnectionError(request=request) from err + + log.debug( + 'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase + ) + + try: + response.raise_for_status() + except httpx.HTTPStatusError as err: # thrown on 4xx and 5xx status code + log.debug("Encountered httpx.HTTPStatusError", exc_info=True) + + if retries > 0 and self._should_retry(err.response): + err.response.close() + return self._retry_request( + options, + cast_type, + retries, + err.response.headers, + stream=stream, + stream_cls=stream_cls, + ) + + # If the response is streamed then we need to explicitly read the response + # to completion before attempting to access the response text. + if not err.response.is_closed: + err.response.read() + + log.debug("Re-raising status error") raise self._make_status_error(err.response) from None - except Exception as err: - raise err - - return self._parse_response( + # return self._parse_response( + # cast_type=cast_type, + # options=options, + # response=response, + # stream=stream, + # stream_cls=stream_cls, + # ) + return self._process_response( cast_type=cast_type, - request_param=params, + options=options, response=response, - enable_stream=enable_stream, + stream=stream, stream_cls=stream_cls, ) + def _retry_request( + self, + options: FinalRequestOptions, + cast_type: type[ResponseT], + remaining_retries: int, + response_headers: httpx.Headers | None, + *, + stream: bool, + stream_cls: type[StreamResponse] | None, + ) -> ResponseT | StreamResponse: + remaining = remaining_retries - 1 + if remaining == 1: + log.debug("1 retry left") + else: + log.debug("%i retries left", remaining) + + timeout = self._calculate_retry_timeout(remaining, options, response_headers) + log.info("Retrying request to %s in %f seconds", options.url, timeout) + + # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a + # different thread if necessary. + time.sleep(timeout) + + return self._request( + options=options, + cast_type=cast_type, + remaining_retries=remaining, + stream=stream, + stream_cls=stream_cls, + ) + + def _process_response( + self, + *, + cast_type: type[ResponseT], + options: FinalRequestOptions, + response: httpx.Response, + stream: bool, + stream_cls: type[StreamResponse] | None, + ) -> ResponseT: + # _legacy_response with raw_response_header to paser method + if response.request.headers.get(RAW_RESPONSE_HEADER) == "true": + return cast( + ResponseT, + LegacyAPIResponse( + raw=response, + client=self, + cast_type=cast_type, + stream=stream, + stream_cls=stream_cls, + options=options, + ), + ) + + origin = get_origin(cast_type) or cast_type + + if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse): + if not issubclass(origin, APIResponse): + raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}") + + response_cls = cast("type[BaseAPIResponse[Any]]", cast_type) + return cast( + ResponseT, + response_cls( + raw=response, + client=self, + cast_type=extract_response_type(response_cls), + stream=stream, + stream_cls=stream_cls, + options=options, + ), + ) + + if cast_type == httpx.Response: + return cast(ResponseT, response) + + api_response = APIResponse( + raw=response, + client=self, + cast_type=cast("type[ResponseT]", cast_type), # pyright: ignore[reportUnnecessaryCast] + stream=stream, + stream_cls=stream_cls, + options=options, + ) + if bool(response.request.headers.get(RAW_RESPONSE_HEADER)): + return cast(ResponseT, api_response) + + return api_response.parse() + + def _request_api_list( + self, + model: type[object], + page: type[SyncPageT], + options: FinalRequestOptions, + ) -> SyncPageT: + def _parser(resp: SyncPageT) -> SyncPageT: + resp._set_private_attributes( + client=self, + model=model, + options=options, + ) + return resp + + options.post_parser = _parser + + return self.request(page, options, stream=False) + + @overload + def get( + self, + path: str, + *, + cast_type: type[ResponseT], + options: UserRequestInput = {}, + stream: Literal[False] = False, + ) -> ResponseT: ... + + @overload + def get( + self, + path: str, + *, + cast_type: type[ResponseT], + options: UserRequestInput = {}, + stream: Literal[True], + stream_cls: type[StreamResponse], + ) -> StreamResponse: ... + + @overload + def get( + self, + path: str, + *, + cast_type: type[ResponseT], + options: UserRequestInput = {}, + stream: bool, + stream_cls: type[StreamResponse] | None = None, + ) -> ResponseT | StreamResponse: ... + def get( self, path: str, *, cast_type: type[ResponseT], options: UserRequestInput = {}, - enable_stream: bool = False, - ) -> ResponseT | StreamResponse: - opts = ClientRequestParam.construct(method="get", url=path, **options) - return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream) + stream: bool = False, + stream_cls: type[StreamResponse] | None = None, + ) -> ResponseT: + opts = FinalRequestOptions.construct(method="get", url=path, **options) + return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls)) + + @overload + def post( + self, + path: str, + *, + cast_type: type[ResponseT], + body: Body | None = None, + options: UserRequestInput = {}, + files: RequestFiles | None = None, + stream: Literal[False] = False, + ) -> ResponseT: ... + + @overload + def post( + self, + path: str, + *, + cast_type: type[ResponseT], + body: Body | None = None, + options: UserRequestInput = {}, + files: RequestFiles | None = None, + stream: Literal[True], + stream_cls: type[StreamResponse], + ) -> StreamResponse: ... + + @overload + def post( + self, + path: str, + *, + cast_type: type[ResponseT], + body: Body | None = None, + options: UserRequestInput = {}, + files: RequestFiles | None = None, + stream: bool, + stream_cls: type[StreamResponse] | None = None, + ) -> ResponseT | StreamResponse: ... def post( self, path: str, *, - body: Body | None = None, cast_type: type[ResponseT], + body: Body | None = None, options: UserRequestInput = {}, files: RequestFiles | None = None, - enable_stream: bool = False, + stream: bool = False, stream_cls: type[StreamResponse[Any]] | None = None, ) -> ResponseT | StreamResponse: - opts = ClientRequestParam.construct( - method="post", json_data=body, files=make_httpx_files(files), url=path, **options + opts = FinalRequestOptions.construct( + method="post", url=path, json_data=body, files=to_httpx_files(files), **options ) - return self.request(cast_type=cast_type, params=opts, enable_stream=enable_stream, stream_cls=stream_cls) + return cast(ResponseT, self.request(cast_type, opts, stream=stream, stream_cls=stream_cls)) def patch( self, path: str, *, - body: Body | None = None, cast_type: type[ResponseT], + body: Body | None = None, options: UserRequestInput = {}, ) -> ResponseT: - opts = ClientRequestParam.construct(method="patch", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options) return self.request( cast_type=cast_type, - params=opts, + options=opts, ) def put( self, path: str, *, - body: Body | None = None, cast_type: type[ResponseT], + body: Body | None = None, options: UserRequestInput = {}, files: RequestFiles | None = None, ) -> ResponseT | StreamResponse: - opts = ClientRequestParam.construct( - method="put", url=path, json_data=body, files=make_httpx_files(files), **options + opts = FinalRequestOptions.construct( + method="put", url=path, json_data=body, files=to_httpx_files(files), **options ) return self.request( cast_type=cast_type, - params=opts, + options=opts, ) def delete( self, path: str, *, - body: Body | None = None, cast_type: type[ResponseT], + body: Body | None = None, options: UserRequestInput = {}, ) -> ResponseT | StreamResponse: - opts = ClientRequestParam.construct(method="delete", url=path, json_data=body, **options) + opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options) return self.request( cast_type=cast_type, - params=opts, + options=opts, ) + def get_api_list( + self, + path: str, + *, + model: type[object], + page: type[SyncPageT], + body: Body | None = None, + options: UserRequestInput = {}, + method: str = "get", + ) -> SyncPageT: + opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options) + return self._request_api_list(model, page, opts) + def _make_status_error(self, response) -> APIStatusError: response_text = response.text.strip() status_code = response.status_code @@ -343,24 +865,46 @@ class HttpClient: return APIStatusError(message=error_msg, response=response) -def make_user_request_input( - max_retries: int | None = None, - timeout: float | Timeout | None | NotGiven = NOT_GIVEN, - extra_headers: Headers = None, - extra_body: Body | None = None, +def make_request_options( + *, query: Query | None = None, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + post_parser: PostParser | NotGiven = NOT_GIVEN, ) -> UserRequestInput: + """Create a dict of type RequestOptions without keys of NotGiven values.""" options: UserRequestInput = {} - if extra_headers is not None: options["headers"] = extra_headers - if max_retries is not None: - options["max_retries"] = max_retries - if not isinstance(timeout, NotGiven): - options["timeout"] = timeout - if query is not None: - options["params"] = query + if extra_body is not None: options["extra_json"] = cast(AnyMapping, extra_body) + if query is not None: + options["params"] = query + + if extra_query is not None: + options["params"] = {**options.get("params", {}), **extra_query} + + if not isinstance(timeout, NotGiven): + options["timeout"] = timeout + + if is_given(post_parser): + # internal + options["post_parser"] = post_parser # type: ignore + return options + + +def _merge_mappings( + obj1: Mapping[_T_co, Union[_T, Omit]], + obj2: Mapping[_T_co, Union[_T, Omit]], +) -> dict[_T_co, _T]: + """Merge two mappings of the same type, removing any values that are instances of `Omit`. + + In cases with duplicate keys the second mapping takes precedence. + """ + merged = {**obj1, **obj2} + return {key: value for key, value in merged.items() if not isinstance(value, Omit)} diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py index b0a91d04a9..21f158a5f4 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_jwt_token.py @@ -3,9 +3,11 @@ import time import cachetools.func import jwt -API_TOKEN_TTL_SECONDS = 3 * 60 +# 缓存时间 3分钟 +CACHE_TTL_SECONDS = 3 * 60 -CACHE_TTL_SECONDS = API_TOKEN_TTL_SECONDS - 30 +# token 有效期比缓存时间 多30秒 +API_TOKEN_TTL_SECONDS = CACHE_TTL_SECONDS + 30 @cachetools.func.ttl_cache(maxsize=10, ttl=CACHE_TTL_SECONDS) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py new file mode 100644 index 0000000000..51623bd860 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_binary_response.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +import os +from collections.abc import AsyncIterator, Iterator +from typing import Any + +import httpx + + +class HttpxResponseContent: + @property + def content(self) -> bytes: + raise NotImplementedError("This method is not implemented for this class.") + + @property + def text(self) -> str: + raise NotImplementedError("This method is not implemented for this class.") + + @property + def encoding(self) -> str | None: + raise NotImplementedError("This method is not implemented for this class.") + + @property + def charset_encoding(self) -> str | None: + raise NotImplementedError("This method is not implemented for this class.") + + def json(self, **kwargs: Any) -> Any: + raise NotImplementedError("This method is not implemented for this class.") + + def read(self) -> bytes: + raise NotImplementedError("This method is not implemented for this class.") + + def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]: + raise NotImplementedError("This method is not implemented for this class.") + + def iter_text(self, chunk_size: int | None = None) -> Iterator[str]: + raise NotImplementedError("This method is not implemented for this class.") + + def iter_lines(self) -> Iterator[str]: + raise NotImplementedError("This method is not implemented for this class.") + + def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]: + raise NotImplementedError("This method is not implemented for this class.") + + def write_to_file( + self, + file: str | os.PathLike[str], + ) -> None: + raise NotImplementedError("This method is not implemented for this class.") + + def stream_to_file( + self, + file: str | os.PathLike[str], + *, + chunk_size: int | None = None, + ) -> None: + raise NotImplementedError("This method is not implemented for this class.") + + def close(self) -> None: + raise NotImplementedError("This method is not implemented for this class.") + + async def aread(self) -> bytes: + raise NotImplementedError("This method is not implemented for this class.") + + async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]: + raise NotImplementedError("This method is not implemented for this class.") + + async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]: + raise NotImplementedError("This method is not implemented for this class.") + + async def aiter_lines(self) -> AsyncIterator[str]: + raise NotImplementedError("This method is not implemented for this class.") + + async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]: + raise NotImplementedError("This method is not implemented for this class.") + + async def astream_to_file( + self, + file: str | os.PathLike[str], + *, + chunk_size: int | None = None, + ) -> None: + raise NotImplementedError("This method is not implemented for this class.") + + async def aclose(self) -> None: + raise NotImplementedError("This method is not implemented for this class.") + + +class HttpxBinaryResponseContent(HttpxResponseContent): + response: httpx.Response + + def __init__(self, response: httpx.Response) -> None: + self.response = response + + @property + def content(self) -> bytes: + return self.response.content + + @property + def encoding(self) -> str | None: + return self.response.encoding + + @property + def charset_encoding(self) -> str | None: + return self.response.charset_encoding + + def read(self) -> bytes: + return self.response.read() + + def text(self) -> str: + raise NotImplementedError("Not implemented for binary response content") + + def json(self, **kwargs: Any) -> Any: + raise NotImplementedError("Not implemented for binary response content") + + def iter_text(self, chunk_size: int | None = None) -> Iterator[str]: + raise NotImplementedError("Not implemented for binary response content") + + def iter_lines(self) -> Iterator[str]: + raise NotImplementedError("Not implemented for binary response content") + + async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]: + raise NotImplementedError("Not implemented for binary response content") + + async def aiter_lines(self) -> AsyncIterator[str]: + raise NotImplementedError("Not implemented for binary response content") + + def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]: + return self.response.iter_bytes(chunk_size) + + def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]: + return self.response.iter_raw(chunk_size) + + def write_to_file( + self, + file: str | os.PathLike[str], + ) -> None: + """Write the output to the given file. + + Accepts a filename or any path-like object, e.g. pathlib.Path + + Note: if you want to stream the data to the file instead of writing + all at once then you should use `.with_streaming_response` when making + the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')` + """ + with open(file, mode="wb") as f: + for data in self.response.iter_bytes(): + f.write(data) + + def stream_to_file( + self, + file: str | os.PathLike[str], + *, + chunk_size: int | None = None, + ) -> None: + with open(file, mode="wb") as f: + for data in self.response.iter_bytes(chunk_size): + f.write(data) + + def close(self) -> None: + return self.response.close() + + async def aread(self) -> bytes: + return await self.response.aread() + + async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]: + return self.response.aiter_bytes(chunk_size) + + async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]: + return self.response.aiter_raw(chunk_size) + + async def astream_to_file( + self, + file: str | os.PathLike[str], + *, + chunk_size: int | None = None, + ) -> None: + path = anyio.Path(file) + async with await path.open(mode="wb") as f: + async for data in self.response.aiter_bytes(chunk_size): + await f.write(data) + + async def aclose(self) -> None: + return await self.response.aclose() + + +class HttpxTextBinaryResponseContent(HttpxBinaryResponseContent): + response: httpx.Response + + @property + def text(self) -> str: + return self.response.text + + def json(self, **kwargs: Any) -> Any: + return self.response.json(**kwargs) + + def iter_text(self, chunk_size: int | None = None) -> Iterator[str]: + return self.response.iter_text(chunk_size) + + def iter_lines(self) -> Iterator[str]: + return self.response.iter_lines() + + async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]: + return self.response.aiter_text(chunk_size) + + async def aiter_lines(self) -> AsyncIterator[str]: + return self.response.aiter_lines() diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py new file mode 100644 index 0000000000..47183b9eee --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_legacy_response.py @@ -0,0 +1,341 @@ +from __future__ import annotations + +import datetime +import functools +import inspect +import logging +from collections.abc import Callable +from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload + +import httpx +import pydantic +from typing_extensions import ParamSpec, override + +from ._base_models import BaseModel, is_basemodel +from ._base_type import NoneType +from ._constants import RAW_RESPONSE_HEADER +from ._errors import APIResponseValidationError +from ._legacy_binary_response import HttpxResponseContent, HttpxTextBinaryResponseContent +from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type +from ._utils import extract_type_arg, is_annotated_type, is_given + +if TYPE_CHECKING: + from ._http_client import HttpClient + from ._request_opt import FinalRequestOptions + +P = ParamSpec("P") +R = TypeVar("R") +_T = TypeVar("_T") + +log: logging.Logger = logging.getLogger(__name__) + + +class LegacyAPIResponse(Generic[R]): + """This is a legacy class as it will be replaced by `APIResponse` + and `AsyncAPIResponse` in the `_response.py` file in the next major + release. + + For the sync client this will mostly be the same with the exception + of `content` & `text` will be methods instead of properties. In the + async client, all methods will be async. + + A migration script will be provided & the migration in general should + be smooth. + """ + + _cast_type: type[R] + _client: HttpClient + _parsed_by_type: dict[type[Any], Any] + _stream: bool + _stream_cls: type[StreamResponse[Any]] | None + _options: FinalRequestOptions + + http_response: httpx.Response + + def __init__( + self, + *, + raw: httpx.Response, + cast_type: type[R], + client: HttpClient, + stream: bool, + stream_cls: type[StreamResponse[Any]] | None, + options: FinalRequestOptions, + ) -> None: + self._cast_type = cast_type + self._client = client + self._parsed_by_type = {} + self._stream = stream + self._stream_cls = stream_cls + self._options = options + self.http_response = raw + + @property + def request_id(self) -> str | None: + return self.http_response.headers.get("x-request-id") # type: ignore[no-any-return] + + @overload + def parse(self, *, to: type[_T]) -> _T: ... + + @overload + def parse(self) -> R: ... + + def parse(self, *, to: type[_T] | None = None) -> R | _T: + """Returns the rich python representation of this response's data. + + NOTE: For the async client: this will become a coroutine in the next major version. + + For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`. + + You can customise the type that the response is parsed into through + the `to` argument, e.g. + + ```py + from zhipuai import BaseModel + + + class MyModel(BaseModel): + foo: str + + + obj = response.parse(to=MyModel) + print(obj.foo) + ``` + + We support parsing: + - `BaseModel` + - `dict` + - `list` + - `Union` + - `str` + - `int` + - `float` + - `httpx.Response` + """ + cache_key = to if to is not None else self._cast_type + cached = self._parsed_by_type.get(cache_key) + if cached is not None: + return cached # type: ignore[no-any-return] + + parsed = self._parse(to=to) + if is_given(self._options.post_parser): + parsed = self._options.post_parser(parsed) + + self._parsed_by_type[cache_key] = parsed + return parsed + + @property + def headers(self) -> httpx.Headers: + return self.http_response.headers + + @property + def http_request(self) -> httpx.Request: + return self.http_response.request + + @property + def status_code(self) -> int: + return self.http_response.status_code + + @property + def url(self) -> httpx.URL: + return self.http_response.url + + @property + def method(self) -> str: + return self.http_request.method + + @property + def content(self) -> bytes: + """Return the binary response content. + + NOTE: this will be removed in favour of `.read()` in the + next major version. + """ + return self.http_response.content + + @property + def text(self) -> str: + """Return the decoded response content. + + NOTE: this will be turned into a method in the next major version. + """ + return self.http_response.text + + @property + def http_version(self) -> str: + return self.http_response.http_version + + @property + def is_closed(self) -> bool: + return self.http_response.is_closed + + @property + def elapsed(self) -> datetime.timedelta: + """The time taken for the complete request/response cycle to complete.""" + return self.http_response.elapsed + + def _parse(self, *, to: type[_T] | None = None) -> R | _T: + # unwrap `Annotated[T, ...]` -> `T` + if to and is_annotated_type(to): + to = extract_type_arg(to, 0) + + if self._stream: + if to: + if not is_stream_class_type(to): + raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}") + + return cast( + _T, + to( + cast_type=extract_stream_chunk_type( + to, + failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]", # noqa: E501 + ), + response=self.http_response, + client=cast(Any, self._client), + ), + ) + + if self._stream_cls: + return cast( + R, + self._stream_cls( + cast_type=extract_stream_chunk_type(self._stream_cls), + response=self.http_response, + client=cast(Any, self._client), + ), + ) + + stream_cls = cast("type[StreamResponse[Any]] | None", self._client._default_stream_cls) + if stream_cls is None: + raise MissingStreamClassError() + + return cast( + R, + stream_cls( + cast_type=self._cast_type, + response=self.http_response, + client=cast(Any, self._client), + ), + ) + + cast_type = to if to is not None else self._cast_type + + # unwrap `Annotated[T, ...]` -> `T` + if is_annotated_type(cast_type): + cast_type = extract_type_arg(cast_type, 0) + + if cast_type is NoneType: + return cast(R, None) + + response = self.http_response + if cast_type == str: + return cast(R, response.text) + + if cast_type == int: + return cast(R, int(response.text)) + + if cast_type == float: + return cast(R, float(response.text)) + + origin = get_origin(cast_type) or cast_type + + if inspect.isclass(origin) and issubclass(origin, HttpxResponseContent): + # in the response, e.g. mime file + *_, filename = response.headers.get("content-disposition", "").split("filename=") + # 判断文件类型是jsonl类型的使用HttpxTextBinaryResponseContent + if filename and filename.endswith(".jsonl") or filename and filename.endswith(".xlsx"): + return cast(R, HttpxTextBinaryResponseContent(response)) + else: + return cast(R, cast_type(response)) # type: ignore + + if origin == LegacyAPIResponse: + raise RuntimeError("Unexpected state - cast_type is `APIResponse`") + + if inspect.isclass(origin) and issubclass(origin, httpx.Response): + # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response + # and pass that class to our request functions. We cannot change the variance to be either + # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct + # the response class ourselves but that is something that should be supported directly in httpx + # as it would be easy to incorrectly construct the Response object due to the multitude of arguments. + if cast_type != httpx.Response: + raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`") + return cast(R, response) + + if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel): + raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`") + + if ( + cast_type is not object + and origin is not list + and origin is not dict + and origin is not Union + and not issubclass(origin, BaseModel) + ): + raise RuntimeError( + f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}." # noqa: E501 + ) + + # split is required to handle cases where additional information is included + # in the response, e.g. application/json; charset=utf-8 + content_type, *_ = response.headers.get("content-type", "*").split(";") + if content_type != "application/json": + if is_basemodel(cast_type): + try: + data = response.json() + except Exception as exc: + log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc) + else: + return self._client._process_response_data( + data=data, + cast_type=cast_type, # type: ignore + response=response, + ) + + if self._client._strict_response_validation: + raise APIResponseValidationError( + response=response, + message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.", # noqa: E501 + json_data=response.text, + ) + + # If the API responds with content that isn't JSON then we just return + # the (decoded) text without performing any parsing so that you can still + # handle the response however you need to. + return response.text # type: ignore + + data = response.json() + + return self._client._process_response_data( + data=data, + cast_type=cast_type, # type: ignore + response=response, + ) + + @override + def __repr__(self) -> str: + return f"" + + +class MissingStreamClassError(TypeError): + def __init__(self) -> None: + super().__init__( + "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference", # noqa: E501 + ) + + +def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]: + """Higher order function that takes one of our bound API methods and wraps it + to support returning the raw `APIResponse` object directly. + """ + + @functools.wraps(func) + def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]: + extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})} + extra_headers[RAW_RESPONSE_HEADER] = "true" + + kwargs["extra_headers"] = extra_headers + + return cast(LegacyAPIResponse[R], func(*args, **kwargs)) + + return wrapped diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py index ac459151fc..c3b894b3a3 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_request_opt.py @@ -1,48 +1,97 @@ from __future__ import annotations -from typing import Any, ClassVar, Union +from collections.abc import Callable +from typing import TYPE_CHECKING, Any, ClassVar, Union, cast +import pydantic.generics from httpx import Timeout -from pydantic import ConfigDict -from typing_extensions import TypedDict, Unpack +from typing_extensions import Required, TypedDict, Unpack, final -from ._base_type import Body, Headers, HttpxRequestFiles, NotGiven, Query -from ._utils import remove_notgiven_indict +from ._base_compat import PYDANTIC_V2, ConfigDict +from ._base_type import AnyMapping, Body, Headers, HttpxRequestFiles, NotGiven, Query +from ._constants import RAW_RESPONSE_HEADER +from ._utils import is_given, strip_not_given class UserRequestInput(TypedDict, total=False): + headers: Headers max_retries: int timeout: float | Timeout | None + params: Query + extra_json: AnyMapping + + +class FinalRequestOptionsInput(TypedDict, total=False): + method: Required[str] + url: Required[str] + params: Query headers: Headers - params: Query | None + max_retries: int + timeout: float | Timeout | None + files: HttpxRequestFiles | None + json_data: Body + extra_json: AnyMapping -class ClientRequestParam: +@final +class FinalRequestOptions(pydantic.BaseModel): method: str url: str - max_retries: Union[int, NotGiven] = NotGiven() - timeout: Union[float, NotGiven] = NotGiven() - headers: Union[Headers, NotGiven] = NotGiven() - json_data: Union[Body, None] = None - files: Union[HttpxRequestFiles, None] = None params: Query = {} - model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True) + headers: Union[Headers, NotGiven] = NotGiven() + max_retries: Union[int, NotGiven] = NotGiven() + timeout: Union[float, Timeout, None, NotGiven] = NotGiven() + files: Union[HttpxRequestFiles, None] = None + idempotency_key: Union[str, None] = None + post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven() - def get_max_retries(self, max_retries) -> int: + # It should be noted that we cannot use `json` here as that would override + # a BaseModel method in an incompatible fashion. + json_data: Union[Body, None] = None + extra_json: Union[AnyMapping, None] = None + + if PYDANTIC_V2: + model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True) + else: + + class Config(pydantic.BaseConfig): # pyright: ignore[reportDeprecated] + arbitrary_types_allowed: bool = True + + def get_max_retries(self, max_retries: int) -> int: if isinstance(self.max_retries, NotGiven): return max_retries return self.max_retries + def _strip_raw_response_header(self) -> None: + if not is_given(self.headers): + return + + if self.headers.get(RAW_RESPONSE_HEADER): + self.headers = {**self.headers} + self.headers.pop(RAW_RESPONSE_HEADER) + + # override the `construct` method so that we can run custom transformations. + # this is necessary as we don't want to do any actual runtime type checking + # (which means we can't use validators) but we do want to ensure that `NotGiven` + # values are not present + # + # type ignore required because we're adding explicit types to `**values` @classmethod def construct( # type: ignore cls, _fields_set: set[str] | None = None, **values: Unpack[UserRequestInput], - ) -> ClientRequestParam: - kwargs: dict[str, Any] = {key: remove_notgiven_indict(value) for key, value in values.items()} - client = cls() - client.__dict__.update(kwargs) + ) -> FinalRequestOptions: + kwargs: dict[str, Any] = { + # we unconditionally call `strip_not_given` on any value + # as it will just ignore any non-mapping types + key: strip_not_given(value) + for key, value in values.items() + } + if PYDANTIC_V2: + return super().model_construct(_fields_set, **kwargs) + return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs)) # pyright: ignore[reportDeprecated] - return client - - model_construct = construct + if not TYPE_CHECKING: + # type checkers incorrectly complain about this assignment + model_construct = construct diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py index 56e60a7934..45443da662 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_response.py @@ -1,87 +1,193 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING, Any, Generic, TypeVar, cast, get_args, get_origin +import inspect +import logging +from collections.abc import Iterator +from typing import TYPE_CHECKING, Any, Generic, TypeVar, Union, cast, get_origin, overload import httpx import pydantic -from typing_extensions import ParamSpec +from typing_extensions import ParamSpec, override +from ._base_models import BaseModel, is_basemodel from ._base_type import NoneType -from ._sse_client import StreamResponse +from ._errors import APIResponseValidationError, ZhipuAIError +from ._sse_client import StreamResponse, extract_stream_chunk_type, is_stream_class_type +from ._utils import extract_type_arg, extract_type_var_from_base, is_annotated_type, is_given if TYPE_CHECKING: from ._http_client import HttpClient + from ._request_opt import FinalRequestOptions P = ParamSpec("P") R = TypeVar("R") +_T = TypeVar("_T") +_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]") +log: logging.Logger = logging.getLogger(__name__) -class HttpResponse(Generic[R]): +class BaseAPIResponse(Generic[R]): _cast_type: type[R] _client: HttpClient - _parsed: R | None - _enable_stream: bool + _parsed_by_type: dict[type[Any], Any] + _is_sse_stream: bool _stream_cls: type[StreamResponse[Any]] + _options: FinalRequestOptions http_response: httpx.Response def __init__( self, *, - raw_response: httpx.Response, + raw: httpx.Response, cast_type: type[R], client: HttpClient, - enable_stream: bool = False, + stream: bool, stream_cls: type[StreamResponse[Any]] | None = None, + options: FinalRequestOptions, ) -> None: self._cast_type = cast_type self._client = client - self._parsed = None + self._parsed_by_type = {} + self._is_sse_stream = stream self._stream_cls = stream_cls - self._enable_stream = enable_stream - self.http_response = raw_response + self._options = options + self.http_response = raw - def parse(self) -> R: - self._parsed = self._parse() - return self._parsed + def _parse(self, *, to: type[_T] | None = None) -> R | _T: + # unwrap `Annotated[T, ...]` -> `T` + if to and is_annotated_type(to): + to = extract_type_arg(to, 0) - def _parse(self) -> R: - if self._enable_stream: - self._parsed = cast( - R, - self._stream_cls( - cast_type=cast(type, get_args(self._stream_cls)[0]), - response=self.http_response, - client=self._client, - ), - ) - return self._parsed - cast_type = self._cast_type - if cast_type is NoneType: - return cast(R, None) - http_response = self.http_response - if cast_type == str: - return cast(R, http_response.text) + if self._is_sse_stream: + if to: + if not is_stream_class_type(to): + raise TypeError(f"Expected custom parse type to be a subclass of {StreamResponse}") - content_type, *_ = http_response.headers.get("content-type", "application/json").split(";") - origin = get_origin(cast_type) or cast_type - if content_type != "application/json": - if issubclass(origin, pydantic.BaseModel): - data = http_response.json() - return self._client._process_response_data( - data=data, - cast_type=cast_type, # type: ignore - response=http_response, + return cast( + _T, + to( + cast_type=extract_stream_chunk_type( + to, + failure_message="Expected custom stream type to be passed with a type argument, e.g. StreamResponse[ChunkType]", # noqa: E501 + ), + response=self.http_response, + client=cast(Any, self._client), + ), ) - return http_response.text + if self._stream_cls: + return cast( + R, + self._stream_cls( + cast_type=extract_stream_chunk_type(self._stream_cls), + response=self.http_response, + client=cast(Any, self._client), + ), + ) - data = http_response.json() + stream_cls = cast("type[Stream[Any]] | None", self._client._default_stream_cls) + if stream_cls is None: + raise MissingStreamClassError() + + return cast( + R, + stream_cls( + cast_type=self._cast_type, + response=self.http_response, + client=cast(Any, self._client), + ), + ) + + cast_type = to if to is not None else self._cast_type + + # unwrap `Annotated[T, ...]` -> `T` + if is_annotated_type(cast_type): + cast_type = extract_type_arg(cast_type, 0) + + if cast_type is NoneType: + return cast(R, None) + + response = self.http_response + if cast_type == str: + return cast(R, response.text) + + if cast_type == bytes: + return cast(R, response.content) + + if cast_type == int: + return cast(R, int(response.text)) + + if cast_type == float: + return cast(R, float(response.text)) + + origin = get_origin(cast_type) or cast_type + + # handle the legacy binary response case + if inspect.isclass(cast_type) and cast_type.__name__ == "HttpxBinaryResponseContent": + return cast(R, cast_type(response)) # type: ignore + + if origin == APIResponse: + raise RuntimeError("Unexpected state - cast_type is `APIResponse`") + + if inspect.isclass(origin) and issubclass(origin, httpx.Response): + # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response + # and pass that class to our request functions. We cannot change the variance to be either + # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct + # the response class ourselves but that is something that should be supported directly in httpx + # as it would be easy to incorrectly construct the Response object due to the multitude of arguments. + if cast_type != httpx.Response: + raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_type`") + return cast(R, response) + + if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel): + raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`") + + if ( + cast_type is not object + and origin is not list + and origin is not dict + and origin is not Union + and not issubclass(origin, BaseModel) + ): + raise RuntimeError( + f"Unsupported type, expected {cast_type} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}." # noqa: E501 + ) + + # split is required to handle cases where additional information is included + # in the response, e.g. application/json; charset=utf-8 + content_type, *_ = response.headers.get("content-type", "*").split(";") + if content_type != "application/json": + if is_basemodel(cast_type): + try: + data = response.json() + except Exception as exc: + log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc) + else: + return self._client._process_response_data( + data=data, + cast_type=cast_type, # type: ignore + response=response, + ) + + if self._client._strict_response_validation: + raise APIResponseValidationError( + response=response, + message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.", # noqa: E501 + json_data=response.text, + ) + + # If the API responds with content that isn't JSON then we just return + # the (decoded) text without performing any parsing so that you can still + # handle the response however you need to. + return response.text # type: ignore + + data = response.json() return self._client._process_response_data( data=data, cast_type=cast_type, # type: ignore - response=http_response, + response=response, ) @property @@ -90,6 +196,7 @@ class HttpResponse(Generic[R]): @property def http_request(self) -> httpx.Request: + """Returns the httpx Request instance associated with the current response.""" return self.http_response.request @property @@ -98,24 +205,194 @@ class HttpResponse(Generic[R]): @property def url(self) -> httpx.URL: + """Returns the URL for which the request was made.""" return self.http_response.url @property def method(self) -> str: return self.http_request.method - @property - def content(self) -> bytes: - return self.http_response.content - - @property - def text(self) -> str: - return self.http_response.text - @property def http_version(self) -> str: return self.http_response.http_version @property def elapsed(self) -> datetime.timedelta: + """The time taken for the complete request/response cycle to complete.""" return self.http_response.elapsed + + @property + def is_closed(self) -> bool: + """Whether or not the response body has been closed. + + If this is False then there is response data that has not been read yet. + You must either fully consume the response body or call `.close()` + before discarding the response to prevent resource leaks. + """ + return self.http_response.is_closed + + @override + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_type}>" # noqa: E501 + + +class APIResponse(BaseAPIResponse[R]): + @property + def request_id(self) -> str | None: + return self.http_response.headers.get("x-request-id") # type: ignore[no-any-return] + + @overload + def parse(self, *, to: type[_T]) -> _T: ... + + @overload + def parse(self) -> R: ... + + def parse(self, *, to: type[_T] | None = None) -> R | _T: + """Returns the rich python representation of this response's data. + + For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`. + + You can customise the type that the response is parsed into through + the `to` argument, e.g. + + ```py + from openai import BaseModel + + + class MyModel(BaseModel): + foo: str + + + obj = response.parse(to=MyModel) + print(obj.foo) + ``` + + We support parsing: + - `BaseModel` + - `dict` + - `list` + - `Union` + - `str` + - `int` + - `float` + - `httpx.Response` + """ + cache_key = to if to is not None else self._cast_type + cached = self._parsed_by_type.get(cache_key) + if cached is not None: + return cached # type: ignore[no-any-return] + + if not self._is_sse_stream: + self.read() + + parsed = self._parse(to=to) + if is_given(self._options.post_parser): + parsed = self._options.post_parser(parsed) + + self._parsed_by_type[cache_key] = parsed + return parsed + + def read(self) -> bytes: + """Read and return the binary response content.""" + try: + return self.http_response.read() + except httpx.StreamConsumed as exc: + # The default error raised by httpx isn't very + # helpful in our case so we re-raise it with + # a different error message. + raise StreamAlreadyConsumed() from exc + + def text(self) -> str: + """Read and decode the response content into a string.""" + self.read() + return self.http_response.text + + def json(self) -> object: + """Read and decode the JSON response content.""" + self.read() + return self.http_response.json() + + def close(self) -> None: + """Close the response and release the connection. + + Automatically called if the response body is read to completion. + """ + self.http_response.close() + + def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]: + """ + A byte-iterator over the decoded response content. + + This automatically handles gzip, deflate and brotli encoded responses. + """ + yield from self.http_response.iter_bytes(chunk_size) + + def iter_text(self, chunk_size: int | None = None) -> Iterator[str]: + """A str-iterator over the decoded response content + that handles both gzip, deflate, etc but also detects the content's + string encoding. + """ + yield from self.http_response.iter_text(chunk_size) + + def iter_lines(self) -> Iterator[str]: + """Like `iter_text()` but will only yield chunks for each line""" + yield from self.http_response.iter_lines() + + +class MissingStreamClassError(TypeError): + def __init__(self) -> None: + super().__init__( + "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference", # noqa: E501 + ) + + +class StreamAlreadyConsumed(ZhipuAIError): # noqa: N818 + """ + Attempted to read or stream content, but the content has already + been streamed. + + This can happen if you use a method like `.iter_lines()` and then attempt + to read th entire response body afterwards, e.g. + + ```py + response = await client.post(...) + async for line in response.iter_lines(): + ... # do something with `line` + + content = await response.read() + # ^ error + ``` + + If you want this behaviour you'll need to either manually accumulate the response + content or call `await response.read()` before iterating over the stream. + """ + + def __init__(self) -> None: + message = ( + "Attempted to read or stream some content, but the content has " + "already been streamed. " + "This could be due to attempting to stream the response " + "content more than once." + "\n\n" + "You can fix this by manually accumulating the response content while streaming " + "or by calling `.read()` before starting to stream." + ) + super().__init__(message) + + +def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type: + """Given a type like `APIResponse[T]`, returns the generic type variable `T`. + + This also handles the case where a concrete subclass is given, e.g. + ```py + class MyResponse(APIResponse[bytes]): + ... + + extract_response_type(MyResponse) -> bytes + ``` + """ + return extract_type_var_from_base( + typ, + generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse)), + index=0, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py index ec2745d059..cbc449d244 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_sse_client.py @@ -1,13 +1,16 @@ from __future__ import annotations +import inspect import json from collections.abc import Iterator, Mapping -from typing import TYPE_CHECKING, Generic +from typing import TYPE_CHECKING, Generic, TypeGuard, cast import httpx +from . import get_origin from ._base_type import ResponseT from ._errors import APIResponseError +from ._utils import extract_type_var_from_base, is_mapping _FIELD_SEPARATOR = ":" @@ -53,8 +56,41 @@ class StreamResponse(Generic[ResponseT]): request=self.response.request, json_data=data["error"], ) + if sse.event is None: + data = sse.json_data() + if is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + raise APIResponseError( + message=message, + request=self.response.request, + json_data=data["error"], + ) yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response) + + else: + data = sse.json_data() + + if sse.event == "error" and is_mapping(data) and data.get("error"): + message = None + error = data.get("error") + if is_mapping(error): + message = error.get("message") + if not message or not isinstance(message, str): + message = "An error occurred during streaming" + + raise APIResponseError( + message=message, + request=self.response.request, + json_data=data["error"], + ) + yield self._data_process_func(data=data, cast_type=self._cast_type, response=self.response) + for sse in iterator: pass @@ -138,3 +174,33 @@ class SSELineParser: except (TypeError, ValueError): pass return + + +def is_stream_class_type(typ: type) -> TypeGuard[type[StreamResponse[object]]]: + """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`""" + origin = get_origin(typ) or typ + return inspect.isclass(origin) and issubclass(origin, StreamResponse) + + +def extract_stream_chunk_type( + stream_cls: type, + *, + failure_message: str | None = None, +) -> type: + """Given a type like `StreamResponse[T]`, returns the generic type variable `T`. + + This also handles the case where a concrete subclass is given, e.g. + ```py + class MyStream(StreamResponse[bytes]): + ... + + extract_stream_chunk_type(MyStream) -> bytes + ``` + """ + + return extract_type_var_from_base( + stream_cls, + index=0, + generic_bases=cast("tuple[type, ...]", (StreamResponse,)), + failure_message=failure_message, + ) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py deleted file mode 100644 index 6b610567da..0000000000 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable, Mapping -from typing import TypeVar - -from ._base_type import NotGiven - - -def remove_notgiven_indict(obj): - if obj is None or (not isinstance(obj, Mapping)): - return obj - return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)} - - -_T = TypeVar("_T") - - -def flatten(t: Iterable[Iterable[_T]]) -> list[_T]: - return [item for sublist in t for item in sublist] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py new file mode 100644 index 0000000000..a66b095816 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/__init__.py @@ -0,0 +1,52 @@ +from ._utils import ( # noqa: I001 + remove_notgiven_indict as remove_notgiven_indict, # noqa: PLC0414 + flatten as flatten, # noqa: PLC0414 + is_dict as is_dict, # noqa: PLC0414 + is_list as is_list, # noqa: PLC0414 + is_given as is_given, # noqa: PLC0414 + is_tuple as is_tuple, # noqa: PLC0414 + is_mapping as is_mapping, # noqa: PLC0414 + is_tuple_t as is_tuple_t, # noqa: PLC0414 + parse_date as parse_date, # noqa: PLC0414 + is_iterable as is_iterable, # noqa: PLC0414 + is_sequence as is_sequence, # noqa: PLC0414 + coerce_float as coerce_float, # noqa: PLC0414 + is_mapping_t as is_mapping_t, # noqa: PLC0414 + removeprefix as removeprefix, # noqa: PLC0414 + removesuffix as removesuffix, # noqa: PLC0414 + extract_files as extract_files, # noqa: PLC0414 + is_sequence_t as is_sequence_t, # noqa: PLC0414 + required_args as required_args, # noqa: PLC0414 + coerce_boolean as coerce_boolean, # noqa: PLC0414 + coerce_integer as coerce_integer, # noqa: PLC0414 + file_from_path as file_from_path, # noqa: PLC0414 + parse_datetime as parse_datetime, # noqa: PLC0414 + strip_not_given as strip_not_given, # noqa: PLC0414 + deepcopy_minimal as deepcopy_minimal, # noqa: PLC0414 + get_async_library as get_async_library, # noqa: PLC0414 + maybe_coerce_float as maybe_coerce_float, # noqa: PLC0414 + get_required_header as get_required_header, # noqa: PLC0414 + maybe_coerce_boolean as maybe_coerce_boolean, # noqa: PLC0414 + maybe_coerce_integer as maybe_coerce_integer, # noqa: PLC0414 + drop_prefix_image_data as drop_prefix_image_data, # noqa: PLC0414 +) + + +from ._typing import ( + is_list_type as is_list_type, # noqa: PLC0414 + is_union_type as is_union_type, # noqa: PLC0414 + extract_type_arg as extract_type_arg, # noqa: PLC0414 + is_iterable_type as is_iterable_type, # noqa: PLC0414 + is_required_type as is_required_type, # noqa: PLC0414 + is_annotated_type as is_annotated_type, # noqa: PLC0414 + strip_annotated_type as strip_annotated_type, # noqa: PLC0414 + extract_type_var_from_base as extract_type_var_from_base, # noqa: PLC0414 +) + +from ._transform import ( + PropertyInfo as PropertyInfo, # noqa: PLC0414 + transform as transform, # noqa: PLC0414 + async_transform as async_transform, # noqa: PLC0414 + maybe_transform as maybe_transform, # noqa: PLC0414 + async_maybe_transform as async_maybe_transform, # noqa: PLC0414 +) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py new file mode 100644 index 0000000000..e8ef1f7935 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_transform.py @@ -0,0 +1,383 @@ +from __future__ import annotations + +import base64 +import io +import pathlib +from collections.abc import Mapping +from datetime import date, datetime +from typing import Any, Literal, TypeVar, cast, get_args, get_type_hints + +import anyio +import pydantic +from typing_extensions import override + +from .._base_compat import is_typeddict, model_dump +from .._files import is_base64_file_input +from ._typing import ( + extract_type_arg, + is_annotated_type, + is_iterable_type, + is_list_type, + is_required_type, + is_union_type, + strip_annotated_type, +) +from ._utils import ( + is_iterable, + is_list, + is_mapping, +) + +_T = TypeVar("_T") + + +# TODO: support for drilling globals() and locals() +# TODO: ensure works correctly with forward references in all cases + + +PropertyFormat = Literal["iso8601", "base64", "custom"] + + +class PropertyInfo: + """Metadata class to be used in Annotated types to provide information about a given type. + + For example: + + class MyParams(TypedDict): + account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')] + + This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API. + """ # noqa: E501 + + alias: str | None + format: PropertyFormat | None + format_template: str | None + discriminator: str | None + + def __init__( + self, + *, + alias: str | None = None, + format: PropertyFormat | None = None, + format_template: str | None = None, + discriminator: str | None = None, + ) -> None: + self.alias = alias + self.format = format + self.format_template = format_template + self.discriminator = discriminator + + @override + def __repr__(self) -> str: + return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')" # noqa: E501 + + +def maybe_transform( + data: object, + expected_type: object, +) -> Any | None: + """Wrapper over `transform()` that allows `None` to be passed. + + See `transform()` for more details. + """ + if data is None: + return None + return transform(data, expected_type) + + +# Wrapper over _transform_recursive providing fake types +def transform( + data: _T, + expected_type: object, +) -> _T: + """Transform dictionaries based off of type information from the given type, for example: + + ```py + class Params(TypedDict, total=False): + card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]] + + + transformed = transform({"card_id": ""}, Params) + # {'cardID': ''} + ``` + + Any keys / data that does not have type information given will be included as is. + + It should be noted that the transformations that this function does are not represented in the type system. + """ + transformed = _transform_recursive(data, annotation=cast(type, expected_type)) + return cast(_T, transformed) + + +def _get_annotated_type(type_: type) -> type | None: + """If the given type is an `Annotated` type then it is returned, if not `None` is returned. + + This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]` + """ + if is_required_type(type_): + # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]` + type_ = get_args(type_)[0] + + if is_annotated_type(type_): + return type_ + + return None + + +def _maybe_transform_key(key: str, type_: type) -> str: + """Transform the given `data` based on the annotations provided in `type_`. + + Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata. + """ + annotated_type = _get_annotated_type(type_) + if annotated_type is None: + # no `Annotated` definition for this type, no transformation needed + return key + + # ignore the first argument as it is the actual type + annotations = get_args(annotated_type)[1:] + for annotation in annotations: + if isinstance(annotation, PropertyInfo) and annotation.alias is not None: + return annotation.alias + + return key + + +def _transform_recursive( + data: object, + *, + annotation: type, + inner_type: type | None = None, +) -> object: + """Transform the given data against the expected type. + + Args: + annotation: The direct type annotation given to the particular piece of data. + This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc + + inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type + is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in + the list can be transformed using the metadata from the container type. + + Defaults to the same value as the `annotation` argument. + """ + if inner_type is None: + inner_type = annotation + + stripped_type = strip_annotated_type(inner_type) + if is_typeddict(stripped_type) and is_mapping(data): + return _transform_typeddict(data, stripped_type) + + if ( + # List[T] + (is_list_type(stripped_type) and is_list(data)) + # Iterable[T] + or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str)) + ): + inner_type = extract_type_arg(stripped_type, 0) + return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data] + + if is_union_type(stripped_type): + # For union types we run the transformation against all subtypes to ensure that everything is transformed. + # + # TODO: there may be edge cases where the same normalized field name will transform to two different names + # in different subtypes. + for subtype in get_args(stripped_type): + data = _transform_recursive(data, annotation=annotation, inner_type=subtype) + return data + + if isinstance(data, pydantic.BaseModel): + return model_dump(data, exclude_unset=True) + + annotated_type = _get_annotated_type(annotation) + if annotated_type is None: + return data + + # ignore the first argument as it is the actual type + annotations = get_args(annotated_type)[1:] + for annotation in annotations: + if isinstance(annotation, PropertyInfo) and annotation.format is not None: + return _format_data(data, annotation.format, annotation.format_template) + + return data + + +def _format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object: + if isinstance(data, date | datetime): + if format_ == "iso8601": + return data.isoformat() + + if format_ == "custom" and format_template is not None: + return data.strftime(format_template) + + if format_ == "base64" and is_base64_file_input(data): + binary: str | bytes | None = None + + if isinstance(data, pathlib.Path): + binary = data.read_bytes() + elif isinstance(data, io.IOBase): + binary = data.read() + + if isinstance(binary, str): # type: ignore[unreachable] + binary = binary.encode() + + if not isinstance(binary, bytes): + raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}") + + return base64.b64encode(binary).decode("ascii") + + return data + + +def _transform_typeddict( + data: Mapping[str, object], + expected_type: type, +) -> Mapping[str, object]: + result: dict[str, object] = {} + annotations = get_type_hints(expected_type, include_extras=True) + for key, value in data.items(): + type_ = annotations.get(key) + if type_ is None: + # we do not have a type annotation for this field, leave it as is + result[key] = value + else: + result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_) + return result + + +async def async_maybe_transform( + data: object, + expected_type: object, +) -> Any | None: + """Wrapper over `async_transform()` that allows `None` to be passed. + + See `async_transform()` for more details. + """ + if data is None: + return None + return await async_transform(data, expected_type) + + +async def async_transform( + data: _T, + expected_type: object, +) -> _T: + """Transform dictionaries based off of type information from the given type, for example: + + ```py + class Params(TypedDict, total=False): + card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]] + + + transformed = transform({"card_id": ""}, Params) + # {'cardID': ''} + ``` + + Any keys / data that does not have type information given will be included as is. + + It should be noted that the transformations that this function does are not represented in the type system. + """ + transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type)) + return cast(_T, transformed) + + +async def _async_transform_recursive( + data: object, + *, + annotation: type, + inner_type: type | None = None, +) -> object: + """Transform the given data against the expected type. + + Args: + annotation: The direct type annotation given to the particular piece of data. + This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc + + inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type + is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in + the list can be transformed using the metadata from the container type. + + Defaults to the same value as the `annotation` argument. + """ + if inner_type is None: + inner_type = annotation + + stripped_type = strip_annotated_type(inner_type) + if is_typeddict(stripped_type) and is_mapping(data): + return await _async_transform_typeddict(data, stripped_type) + + if ( + # List[T] + (is_list_type(stripped_type) and is_list(data)) + # Iterable[T] + or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str)) + ): + inner_type = extract_type_arg(stripped_type, 0) + return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data] + + if is_union_type(stripped_type): + # For union types we run the transformation against all subtypes to ensure that everything is transformed. + # + # TODO: there may be edge cases where the same normalized field name will transform to two different names + # in different subtypes. + for subtype in get_args(stripped_type): + data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype) + return data + + if isinstance(data, pydantic.BaseModel): + return model_dump(data, exclude_unset=True) + + annotated_type = _get_annotated_type(annotation) + if annotated_type is None: + return data + + # ignore the first argument as it is the actual type + annotations = get_args(annotated_type)[1:] + for annotation in annotations: + if isinstance(annotation, PropertyInfo) and annotation.format is not None: + return await _async_format_data(data, annotation.format, annotation.format_template) + + return data + + +async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object: + if isinstance(data, date | datetime): + if format_ == "iso8601": + return data.isoformat() + + if format_ == "custom" and format_template is not None: + return data.strftime(format_template) + + if format_ == "base64" and is_base64_file_input(data): + binary: str | bytes | None = None + + if isinstance(data, pathlib.Path): + binary = await anyio.Path(data).read_bytes() + elif isinstance(data, io.IOBase): + binary = data.read() + + if isinstance(binary, str): # type: ignore[unreachable] + binary = binary.encode() + + if not isinstance(binary, bytes): + raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}") + + return base64.b64encode(binary).decode("ascii") + + return data + + +async def _async_transform_typeddict( + data: Mapping[str, object], + expected_type: type, +) -> Mapping[str, object]: + result: dict[str, object] = {} + annotations = get_type_hints(expected_type, include_extras=True) + for key, value in data.items(): + type_ = annotations.get(key) + if type_ is None: + # we do not have a type annotation for this field, leave it as is + result[key] = value + else: + result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_) + return result diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py new file mode 100644 index 0000000000..c7c54dcc37 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_typing.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +from collections import abc as _c_abc +from collections.abc import Iterable +from typing import Annotated, Any, TypeVar, cast, get_args, get_origin + +from typing_extensions import Required + +from .._base_compat import is_union as _is_union +from .._base_type import InheritsGeneric + + +def is_annotated_type(typ: type) -> bool: + return get_origin(typ) == Annotated + + +def is_list_type(typ: type) -> bool: + return (get_origin(typ) or typ) == list + + +def is_iterable_type(typ: type) -> bool: + """If the given type is `typing.Iterable[T]`""" + origin = get_origin(typ) or typ + return origin in {Iterable, _c_abc.Iterable} + + +def is_union_type(typ: type) -> bool: + return _is_union(get_origin(typ)) + + +def is_required_type(typ: type) -> bool: + return get_origin(typ) == Required + + +def is_typevar(typ: type) -> bool: + # type ignore is required because type checkers + # think this expression will always return False + return type(typ) == TypeVar # type: ignore + + +# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]] +def strip_annotated_type(typ: type) -> type: + if is_required_type(typ) or is_annotated_type(typ): + return strip_annotated_type(cast(type, get_args(typ)[0])) + + return typ + + +def extract_type_arg(typ: type, index: int) -> type: + args = get_args(typ) + try: + return cast(type, args[index]) + except IndexError as err: + raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err + + +def extract_type_var_from_base( + typ: type, + *, + generic_bases: tuple[type, ...], + index: int, + failure_message: str | None = None, +) -> type: + """Given a type like `Foo[T]`, returns the generic type variable `T`. + + This also handles the case where a concrete subclass is given, e.g. + ```py + class MyResponse(Foo[bytes]): + ... + + extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes + ``` + + And where a generic subclass is given: + ```py + _T = TypeVar('_T') + class MyResponse(Foo[_T]): + ... + + extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes + ``` + """ + cls = cast(object, get_origin(typ) or typ) + if cls in generic_bases: + # we're given the class directly + return extract_type_arg(typ, index) + + # if a subclass is given + # --- + # this is needed as __orig_bases__ is not present in the typeshed stubs + # because it is intended to be for internal use only, however there does + # not seem to be a way to resolve generic TypeVars for inherited subclasses + # without using it. + if isinstance(cls, InheritsGeneric): + target_base_class: Any | None = None + for base in cls.__orig_bases__: + if base.__origin__ in generic_bases: + target_base_class = base + break + + if target_base_class is None: + raise RuntimeError( + "Could not find the generic base class;\n" + "This should never happen;\n" + f"Does {cls} inherit from one of {generic_bases} ?" + ) + + extracted = extract_type_arg(target_base_class, index) + if is_typevar(extracted): + # If the extracted type argument is itself a type variable + # then that means the subclass itself is generic, so we have + # to resolve the type argument from the class itself, not + # the base class. + # + # Note: if there is more than 1 type argument, the subclass could + # change the ordering of the type arguments, this is not currently + # supported. + return extract_type_arg(typ, index) + + return extracted + + raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}") diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py new file mode 100644 index 0000000000..ce5e7786aa --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/_utils/_utils.py @@ -0,0 +1,409 @@ +from __future__ import annotations + +import functools +import inspect +import os +import re +from collections.abc import Callable, Iterable, Mapping, Sequence +from pathlib import Path +from typing import ( + Any, + TypeGuard, + TypeVar, + Union, + cast, + overload, +) + +import sniffio + +from .._base_compat import parse_date as parse_date # noqa: PLC0414 +from .._base_compat import parse_datetime as parse_datetime # noqa: PLC0414 +from .._base_type import FileTypes, Headers, HeadersLike, NotGiven, NotGivenOr + + +def remove_notgiven_indict(obj): + if obj is None or (not isinstance(obj, Mapping)): + return obj + return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)} + + +_T = TypeVar("_T") +_TupleT = TypeVar("_TupleT", bound=tuple[object, ...]) +_MappingT = TypeVar("_MappingT", bound=Mapping[str, object]) +_SequenceT = TypeVar("_SequenceT", bound=Sequence[object]) +CallableT = TypeVar("CallableT", bound=Callable[..., Any]) + + +def flatten(t: Iterable[Iterable[_T]]) -> list[_T]: + return [item for sublist in t for item in sublist] + + +def extract_files( + # TODO: this needs to take Dict but variance issues..... + # create protocol type ? + query: Mapping[str, object], + *, + paths: Sequence[Sequence[str]], +) -> list[tuple[str, FileTypes]]: + """Recursively extract files from the given dictionary based on specified paths. + + A path may look like this ['foo', 'files', '', 'data']. + + Note: this mutates the given dictionary. + """ + files: list[tuple[str, FileTypes]] = [] + for path in paths: + files.extend(_extract_items(query, path, index=0, flattened_key=None)) + return files + + +def _extract_items( + obj: object, + path: Sequence[str], + *, + index: int, + flattened_key: str | None, +) -> list[tuple[str, FileTypes]]: + try: + key = path[index] + except IndexError: + if isinstance(obj, NotGiven): + # no value was provided - we can safely ignore + return [] + + # cyclical import + from .._files import assert_is_file_content + + # We have exhausted the path, return the entry we found. + assert_is_file_content(obj, key=flattened_key) + assert flattened_key is not None + return [(flattened_key, cast(FileTypes, obj))] + + index += 1 + if is_dict(obj): + try: + # We are at the last entry in the path so we must remove the field + if (len(path)) == index: + item = obj.pop(key) + else: + item = obj[key] + except KeyError: + # Key was not present in the dictionary, this is not indicative of an error + # as the given path may not point to a required field. We also do not want + # to enforce required fields as the API may differ from the spec in some cases. + return [] + if flattened_key is None: + flattened_key = key + else: + flattened_key += f"[{key}]" + return _extract_items( + item, + path, + index=index, + flattened_key=flattened_key, + ) + elif is_list(obj): + if key != "": + return [] + + return flatten( + [ + _extract_items( + item, + path, + index=index, + flattened_key=flattened_key + "[]" if flattened_key is not None else "[]", + ) + for item in obj + ] + ) + + # Something unexpected was passed, just ignore it. + return [] + + +def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]: + return not isinstance(obj, NotGiven) + + +# Type safe methods for narrowing types with TypeVars. +# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown], +# however this cause Pyright to rightfully report errors. As we know we don't +# care about the contained types we can safely use `object` in it's place. +# +# There are two separate functions defined, `is_*` and `is_*_t` for different use cases. +# `is_*` is for when you're dealing with an unknown input +# `is_*_t` is for when you're narrowing a known union type to a specific subset + + +def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]: + return isinstance(obj, tuple) + + +def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]: + return isinstance(obj, tuple) + + +def is_sequence(obj: object) -> TypeGuard[Sequence[object]]: + return isinstance(obj, Sequence) + + +def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]: + return isinstance(obj, Sequence) + + +def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]: + return isinstance(obj, Mapping) + + +def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]: + return isinstance(obj, Mapping) + + +def is_dict(obj: object) -> TypeGuard[dict[object, object]]: + return isinstance(obj, dict) + + +def is_list(obj: object) -> TypeGuard[list[object]]: + return isinstance(obj, list) + + +def is_iterable(obj: object) -> TypeGuard[Iterable[object]]: + return isinstance(obj, Iterable) + + +def deepcopy_minimal(item: _T) -> _T: + """Minimal reimplementation of copy.deepcopy() that will only copy certain object types: + + - mappings, e.g. `dict` + - list + + This is done for performance reasons. + """ + if is_mapping(item): + return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()}) + if is_list(item): + return cast(_T, [deepcopy_minimal(entry) for entry in item]) + return item + + +# copied from https://github.com/Rapptz/RoboDanny +def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str: + size = len(seq) + if size == 0: + return "" + + if size == 1: + return seq[0] + + if size == 2: + return f"{seq[0]} {final} {seq[1]}" + + return delim.join(seq[:-1]) + f" {final} {seq[-1]}" + + +def quote(string: str) -> str: + """Add single quotation marks around the given string. Does *not* do any escaping.""" + return f"'{string}'" + + +def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]: + """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function. + + Useful for enforcing runtime validation of overloaded functions. + + Example usage: + ```py + @overload + def foo(*, a: str) -> str: + ... + + + @overload + def foo(*, b: bool) -> str: + ... + + + # This enforces the same constraints that a static type checker would + # i.e. that either a or b must be passed to the function + @required_args(["a"], ["b"]) + def foo(*, a: str | None = None, b: bool | None = None) -> str: + ... + ``` + """ + + def inner(func: CallableT) -> CallableT: + params = inspect.signature(func).parameters + positional = [ + name + for name, param in params.items() + if param.kind + in { + param.POSITIONAL_ONLY, + param.POSITIONAL_OR_KEYWORD, + } + ] + + @functools.wraps(func) + def wrapper(*args: object, **kwargs: object) -> object: + given_params: set[str] = set() + for i, _ in enumerate(args): + try: + given_params.add(positional[i]) + except IndexError: + raise TypeError( + f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given" + ) from None + + given_params.update(kwargs.keys()) + + for variant in variants: + matches = all(param in given_params for param in variant) + if matches: + break + else: # no break + if len(variants) > 1: + variations = human_join( + ["(" + human_join([quote(arg) for arg in variant], final="and") + ")" for variant in variants] + ) + msg = f"Missing required arguments; Expected either {variations} arguments to be given" + else: + # TODO: this error message is not deterministic + missing = list(set(variants[0]) - given_params) + if len(missing) > 1: + msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}" + else: + msg = f"Missing required argument: {quote(missing[0])}" + raise TypeError(msg) + return func(*args, **kwargs) + + return wrapper # type: ignore + + return inner + + +_K = TypeVar("_K") +_V = TypeVar("_V") + + +@overload +def strip_not_given(obj: None) -> None: ... + + +@overload +def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ... + + +@overload +def strip_not_given(obj: object) -> object: ... + + +def strip_not_given(obj: object | None) -> object: + """Remove all top-level keys where their values are instances of `NotGiven`""" + if obj is None: + return None + + if not is_mapping(obj): + return obj + + return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)} + + +def coerce_integer(val: str) -> int: + return int(val, base=10) + + +def coerce_float(val: str) -> float: + return float(val) + + +def coerce_boolean(val: str) -> bool: + return val in {"true", "1", "on"} + + +def maybe_coerce_integer(val: str | None) -> int | None: + if val is None: + return None + return coerce_integer(val) + + +def maybe_coerce_float(val: str | None) -> float | None: + if val is None: + return None + return coerce_float(val) + + +def maybe_coerce_boolean(val: str | None) -> bool | None: + if val is None: + return None + return coerce_boolean(val) + + +def removeprefix(string: str, prefix: str) -> str: + """Remove a prefix from a string. + + Backport of `str.removeprefix` for Python < 3.9 + """ + if string.startswith(prefix): + return string[len(prefix) :] + return string + + +def removesuffix(string: str, suffix: str) -> str: + """Remove a suffix from a string. + + Backport of `str.removesuffix` for Python < 3.9 + """ + if string.endswith(suffix): + return string[: -len(suffix)] + return string + + +def file_from_path(path: str) -> FileTypes: + contents = Path(path).read_bytes() + file_name = os.path.basename(path) + return (file_name, contents) + + +def get_required_header(headers: HeadersLike, header: str) -> str: + lower_header = header.lower() + if isinstance(headers, Mapping): + headers = cast(Headers, headers) + for k, v in headers.items(): + if k.lower() == lower_header and isinstance(v, str): + return v + + """ to deal with the case where the header looks like Stainless-Event-Id """ + intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize()) + + for normalized_header in [header, lower_header, header.upper(), intercaps_header]: + value = headers.get(normalized_header) + if value: + return value + + raise ValueError(f"Could not find {header} header") + + +def get_async_library() -> str: + try: + return sniffio.current_async_library() + except Exception: + return "false" + + +def drop_prefix_image_data(content: Union[str, list[dict]]) -> Union[str, list[dict]]: + """ + 删除 ;base64, 前缀 + :param image_data: + :return: + """ + if isinstance(content, list): + for data in content: + if data.get("type") == "image_url": + image_data = data.get("image_url").get("url") + if image_data.startswith("data:image/"): + image_data = image_data.split("base64,")[-1] + data["image_url"]["url"] = image_data + + return content diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py new file mode 100644 index 0000000000..e5fce94c00 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/logs.py @@ -0,0 +1,78 @@ +import logging +import os +import time + +logger = logging.getLogger(__name__) + + +class LoggerNameFilter(logging.Filter): + def filter(self, record): + # return record.name.startswith("loom_core") or record.name in "ERROR" or ( + # record.name.startswith("uvicorn.error") + # and record.getMessage().startswith("Uvicorn running on") + # ) + return True + + +def get_log_file(log_path: str, sub_dir: str): + """ + sub_dir should contain a timestamp. + """ + log_dir = os.path.join(log_path, sub_dir) + # Here should be creating a new directory each time, so `exist_ok=False` + os.makedirs(log_dir, exist_ok=False) + return os.path.join(log_dir, "zhipuai.log") + + +def get_config_dict(log_level: str, log_file_path: str, log_backup_count: int, log_max_bytes: int) -> dict: + # for windows, the path should be a raw string. + log_file_path = log_file_path.encode("unicode-escape").decode() if os.name == "nt" else log_file_path + log_level = log_level.upper() + config_dict = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "formatter": {"format": ("%(asctime)s %(name)-12s %(process)d %(levelname)-8s %(message)s")}, + }, + "filters": { + "logger_name_filter": { + "()": __name__ + ".LoggerNameFilter", + }, + }, + "handlers": { + "stream_handler": { + "class": "logging.StreamHandler", + "formatter": "formatter", + "level": log_level, + # "stream": "ext://sys.stdout", + # "filters": ["logger_name_filter"], + }, + "file_handler": { + "class": "logging.handlers.RotatingFileHandler", + "formatter": "formatter", + "level": log_level, + "filename": log_file_path, + "mode": "a", + "maxBytes": log_max_bytes, + "backupCount": log_backup_count, + "encoding": "utf8", + }, + }, + "loggers": { + "loom_core": { + "handlers": ["stream_handler", "file_handler"], + "level": log_level, + "propagate": False, + } + }, + "root": { + "level": log_level, + "handlers": ["stream_handler", "file_handler"], + }, + } + return config_dict + + +def get_timestamp_ms(): + t = time.time() + return int(round(t * 1000)) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py new file mode 100644 index 0000000000..7f0b1b91d9 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/core/pagination.py @@ -0,0 +1,62 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Any, Generic, Optional, TypeVar, cast + +from typing_extensions import Protocol, override, runtime_checkable + +from ._http_client import BasePage, BaseSyncPage, PageInfo + +__all__ = ["SyncPage", "SyncCursorPage"] + +_T = TypeVar("_T") + + +@runtime_checkable +class CursorPageItem(Protocol): + id: Optional[str] + + +class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]): + """Note: no pagination actually occurs yet, this is for forwards-compatibility.""" + + data: list[_T] + object: str + + @override + def _get_page_items(self) -> list[_T]: + data = self.data + if not data: + return [] + return data + + @override + def next_page_info(self) -> None: + """ + This page represents a response that isn't actually paginated at the API level + so there will never be a next page. + """ + return None + + +class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]): + data: list[_T] + + @override + def _get_page_items(self) -> list[_T]: + data = self.data + if not data: + return [] + return data + + @override + def next_page_info(self) -> Optional[PageInfo]: + data = self.data + if not data: + return None + + item = cast(Any, data[-1]) + if not isinstance(item, CursorPageItem) or item.id is None: + # TODO emit warning log + return None + + return PageInfo(params={"after": item.id}) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py new file mode 100644 index 0000000000..9f941fb91c --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/__init__.py @@ -0,0 +1,5 @@ +from .assistant_completion import AssistantCompletion + +__all__ = [ + "AssistantCompletion", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py new file mode 100644 index 0000000000..cbfb6edaeb --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_completion.py @@ -0,0 +1,40 @@ +from typing import Any, Optional + +from ...core import BaseModel +from .message import MessageContent + +__all__ = ["AssistantCompletion", "CompletionUsage"] + + +class ErrorInfo(BaseModel): + code: str # 错误码 + message: str # 错误信息 + + +class AssistantChoice(BaseModel): + index: int # 结果下标 + delta: MessageContent # 当前会话输出消息体 + finish_reason: str + """ + # 推理结束原因 stop代表推理自然结束或触发停止词。 sensitive 代表模型推理内容被安全审核接口拦截。请注意,针对此类内容,请用户自行判断并决定是否撤回已公开的内容。 + # network_error 代表模型推理服务异常。 + """ # noqa: E501 + metadata: dict # 元信息,拓展字段 + + +class CompletionUsage(BaseModel): + prompt_tokens: int # 输入的 tokens 数量 + completion_tokens: int # 输出的 tokens 数量 + total_tokens: int # 总 tokens 数量 + + +class AssistantCompletion(BaseModel): + id: str # 请求 ID + conversation_id: str # 会话 ID + assistant_id: str # 智能体 ID + created: int # 请求创建时间,Unix 时间戳 + status: str # 返回状态,包括:`completed` 表示生成结束`in_progress`表示生成中 `failed` 表示生成异常 + last_error: Optional[ErrorInfo] # 异常信息 + choices: list[AssistantChoice] # 增量返回的信息 + metadata: Optional[dict[str, Any]] # 元信息,拓展字段 + usage: Optional[CompletionUsage] # tokens 数量统计 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py new file mode 100644 index 0000000000..03f14f4238 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_params.py @@ -0,0 +1,7 @@ +from typing import TypedDict + + +class ConversationParameters(TypedDict, total=False): + assistant_id: str # 智能体 ID + page: int # 当前分页 + page_size: int # 分页数量 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py new file mode 100644 index 0000000000..d1833d220a --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_conversation_resp.py @@ -0,0 +1,29 @@ +from ...core import BaseModel + +__all__ = ["ConversationUsageListResp"] + + +class Usage(BaseModel): + prompt_tokens: int # 用户输入的 tokens 数量 + completion_tokens: int # 模型输入的 tokens 数量 + total_tokens: int # 总 tokens 数量 + + +class ConversationUsage(BaseModel): + id: str # 会话 id + assistant_id: str # 智能体Assistant id + create_time: int # 创建时间 + update_time: int # 更新时间 + usage: Usage # 会话中 tokens 数量统计 + + +class ConversationUsageList(BaseModel): + assistant_id: str # 智能体id + has_more: bool # 是否还有更多页 + conversation_list: list[ConversationUsage] # 返回的 + + +class ConversationUsageListResp(BaseModel): + code: int + msg: str + data: ConversationUsageList diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py new file mode 100644 index 0000000000..2def1025cd --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_create_params.py @@ -0,0 +1,32 @@ +from typing import Optional, TypedDict, Union + + +class AssistantAttachments: + file_id: str + + +class MessageTextContent: + type: str # 目前支持 type = text + text: str + + +MessageContent = Union[MessageTextContent] + + +class ConversationMessage(TypedDict): + """会话消息体""" + + role: str # 用户的输入角色,例如 'user' + content: list[MessageContent] # 会话消息体的内容 + + +class AssistantParameters(TypedDict, total=False): + """智能体参数类""" + + assistant_id: str # 智能体 ID + conversation_id: Optional[str] # 会话 ID,不传则创建新会话 + model: str # 模型名称,默认为 'GLM-4-Assistant' + stream: bool # 是否支持流式 SSE,需要传入 True + messages: list[ConversationMessage] # 会话消息体 + attachments: Optional[list[AssistantAttachments]] # 会话指定的文件,非必填 + metadata: Optional[dict] # 元信息,拓展字段,非必填 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py new file mode 100644 index 0000000000..0709cdbcad --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/assistant_support_resp.py @@ -0,0 +1,21 @@ +from ...core import BaseModel + +__all__ = ["AssistantSupportResp"] + + +class AssistantSupport(BaseModel): + assistant_id: str # 智能体的 Assistant id,用于智能体会话 + created_at: int # 创建时间 + updated_at: int # 更新时间 + name: str # 智能体名称 + avatar: str # 智能体头像 + description: str # 智能体描述 + status: str # 智能体状态,目前只有 publish + tools: list[str] # 智能体支持的工具名 + starter_prompts: list[str] # 智能体启动推荐的 prompt + + +class AssistantSupportResp(BaseModel): + code: int + msg: str + data: list[AssistantSupport] # 智能体列表 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py new file mode 100644 index 0000000000..562e0151e5 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/__init__.py @@ -0,0 +1,3 @@ +from .message_content import MessageContent + +__all__ = ["MessageContent"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py new file mode 100644 index 0000000000..6a1a438a6f --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/message_content.py @@ -0,0 +1,13 @@ +from typing import Annotated, TypeAlias, Union + +from ....core._utils import PropertyInfo +from .text_content_block import TextContentBlock +from .tools_delta_block import ToolsDeltaBlock + +__all__ = ["MessageContent"] + + +MessageContent: TypeAlias = Annotated[ + Union[ToolsDeltaBlock, TextContentBlock], + PropertyInfo(discriminator="type"), +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py new file mode 100644 index 0000000000..865fb1139e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/text_content_block.py @@ -0,0 +1,14 @@ +from typing import Literal + +from ....core import BaseModel + +__all__ = ["TextContentBlock"] + + +class TextContentBlock(BaseModel): + content: str + + role: str = "assistant" + + type: Literal["content"] = "content" + """Always `content`.""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py new file mode 100644 index 0000000000..9d569b282e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/code_interpreter_delta_block.py @@ -0,0 +1,27 @@ +from typing import Literal + +__all__ = ["CodeInterpreterToolBlock"] + +from .....core import BaseModel + + +class CodeInterpreterToolOutput(BaseModel): + """代码工具输出结果""" + + type: str # 代码执行日志,目前只有 logs + logs: str # 代码执行的日志结果 + error_msg: str # 错误信息 + + +class CodeInterpreter(BaseModel): + """代码解释器""" + + input: str # 生成的代码片段,输入给代码沙盒 + outputs: list[CodeInterpreterToolOutput] # 代码执行后的输出结果 + + +class CodeInterpreterToolBlock(BaseModel): + """代码工具块""" + + code_interpreter: CodeInterpreter # 代码解释器对象 + type: Literal["code_interpreter"] # 调用工具的类型,始终为 `code_interpreter` diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py new file mode 100644 index 0000000000..0b6895556b --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/drawing_tool_delta_block.py @@ -0,0 +1,21 @@ +from typing import Literal + +from .....core import BaseModel + +__all__ = ["DrawingToolBlock"] + + +class DrawingToolOutput(BaseModel): + image: str + + +class DrawingTool(BaseModel): + input: str + outputs: list[DrawingToolOutput] + + +class DrawingToolBlock(BaseModel): + drawing_tool: DrawingTool + + type: Literal["drawing_tool"] + """Always `drawing_tool`.""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py new file mode 100644 index 0000000000..c439bc4b3f --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/function_delta_block.py @@ -0,0 +1,22 @@ +from typing import Literal, Union + +__all__ = ["FunctionToolBlock"] + +from .....core import BaseModel + + +class FunctionToolOutput(BaseModel): + content: str + + +class FunctionTool(BaseModel): + name: str + arguments: Union[str, dict] + outputs: list[FunctionToolOutput] + + +class FunctionToolBlock(BaseModel): + function: FunctionTool + + type: Literal["function"] + """Always `drawing_tool`.""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py new file mode 100644 index 0000000000..4789e9378a --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/retrieval_delta_black.py @@ -0,0 +1,41 @@ +from typing import Literal + +from .....core import BaseModel + + +class RetrievalToolOutput(BaseModel): + """ + This class represents the output of a retrieval tool. + + Attributes: + - text (str): The text snippet retrieved from the knowledge base. + - document (str): The name of the document from which the text snippet was retrieved, returned only in intelligent configuration. + """ # noqa: E501 + + text: str + document: str + + +class RetrievalTool(BaseModel): + """ + This class represents the outputs of a retrieval tool. + + Attributes: + - outputs (List[RetrievalToolOutput]): A list of text snippets and their respective document names retrieved from the knowledge base. + """ # noqa: E501 + + outputs: list[RetrievalToolOutput] + + +class RetrievalToolBlock(BaseModel): + """ + This class represents a block for invoking the retrieval tool. + + Attributes: + - retrieval (RetrievalTool): An instance of the RetrievalTool class containing the retrieval outputs. + - type (Literal["retrieval"]): The type of tool being used, always set to "retrieval". + """ + + retrieval: RetrievalTool + type: Literal["retrieval"] + """Always `retrieval`.""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py new file mode 100644 index 0000000000..98544053d4 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/tools_type.py @@ -0,0 +1,16 @@ +from typing import Annotated, TypeAlias, Union + +from .....core._utils import PropertyInfo +from .code_interpreter_delta_block import CodeInterpreterToolBlock +from .drawing_tool_delta_block import DrawingToolBlock +from .function_delta_block import FunctionToolBlock +from .retrieval_delta_black import RetrievalToolBlock +from .web_browser_delta_block import WebBrowserToolBlock + +__all__ = ["ToolsType"] + + +ToolsType: TypeAlias = Annotated[ + Union[DrawingToolBlock, CodeInterpreterToolBlock, WebBrowserToolBlock, RetrievalToolBlock, FunctionToolBlock], + PropertyInfo(discriminator="type"), +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py new file mode 100644 index 0000000000..966e6fe0c8 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools/web_browser_delta_block.py @@ -0,0 +1,48 @@ +from typing import Literal + +from .....core import BaseModel + +__all__ = ["WebBrowserToolBlock"] + + +class WebBrowserOutput(BaseModel): + """ + This class represents the output of a web browser search result. + + Attributes: + - title (str): The title of the search result. + - link (str): The URL link to the search result's webpage. + - content (str): The textual content extracted from the search result. + - error_msg (str): Any error message encountered during the search or retrieval process. + """ + + title: str + link: str + content: str + error_msg: str + + +class WebBrowser(BaseModel): + """ + This class represents the input and outputs of a web browser search. + + Attributes: + - input (str): The input query for the web browser search. + - outputs (List[WebBrowserOutput]): A list of search results returned by the web browser. + """ + + input: str + outputs: list[WebBrowserOutput] + + +class WebBrowserToolBlock(BaseModel): + """ + This class represents a block for invoking the web browser tool. + + Attributes: + - web_browser (WebBrowser): An instance of the WebBrowser class containing the search input and outputs. + - type (Literal["web_browser"]): The type of tool being used, always set to "web_browser". + """ + + web_browser: WebBrowser + type: Literal["web_browser"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py new file mode 100644 index 0000000000..781a1ab819 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/assistant/message/tools_delta_block.py @@ -0,0 +1,16 @@ +from typing import Literal + +from ....core import BaseModel +from .tools.tools_type import ToolsType + +__all__ = ["ToolsDeltaBlock"] + + +class ToolsDeltaBlock(BaseModel): + tool_calls: list[ToolsType] + """The index of the content part in the message.""" + + role: str = "tool" + + type: Literal["tool_calls"] = "tool_calls" + """Always `tool_calls`.""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py new file mode 100644 index 0000000000..560562915c --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch.py @@ -0,0 +1,82 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import builtins +from typing import Literal, Optional + +from ..core import BaseModel +from .batch_error import BatchError +from .batch_request_counts import BatchRequestCounts + +__all__ = ["Batch", "Errors"] + + +class Errors(BaseModel): + data: Optional[list[BatchError]] = None + + object: Optional[str] = None + """这个类型,一直是`list`。""" + + +class Batch(BaseModel): + id: str + + completion_window: str + """用于执行请求的地址信息。""" + + created_at: int + """这是 Unix timestamp (in seconds) 表示的创建时间。""" + + endpoint: str + """这是ZhipuAI endpoint的地址。""" + + input_file_id: str + """标记为batch的输入文件的ID。""" + + object: Literal["batch"] + """这个类型,一直是`batch`.""" + + status: Literal[ + "validating", "failed", "in_progress", "finalizing", "completed", "expired", "cancelling", "cancelled" + ] + """batch 的状态。""" + + cancelled_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的取消时间。""" + + cancelling_at: Optional[int] = None + """Unix timestamp (in seconds) 表示发起取消的请求时间 """ + + completed_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的完成时间。""" + + error_file_id: Optional[str] = None + """这个文件id包含了执行请求失败的请求的输出。""" + + errors: Optional[Errors] = None + + expired_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的将在过期时间。""" + + expires_at: Optional[int] = None + """Unix timestamp (in seconds) 触发过期""" + + failed_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的失败时间。""" + + finalizing_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的最终时间。""" + + in_progress_at: Optional[int] = None + """Unix timestamp (in seconds) 表示的开始处理时间。""" + + metadata: Optional[builtins.object] = None + """ + key:value形式的元数据,以便将信息存储 + 结构化格式。键的长度是64个字符,值最长512个字符 + """ + + output_file_id: Optional[str] = None + """完成请求的输出文件的ID。""" + + request_counts: Optional[BatchRequestCounts] = None + """批次中不同状态的请求计数""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py new file mode 100644 index 0000000000..3dae65ea46 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_create_params.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Literal, Optional + +from typing_extensions import Required, TypedDict + +__all__ = ["BatchCreateParams"] + + +class BatchCreateParams(TypedDict, total=False): + completion_window: Required[str] + """The time frame within which the batch should be processed. + + Currently only `24h` is supported. + """ + + endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings"]] + """The endpoint to be used for all requests in the batch. + + Currently `/v1/chat/completions` and `/v1/embeddings` are supported. + """ + + input_file_id: Required[str] + """The ID of an uploaded file that contains requests for the new batch. + + See [upload file](https://platform.openai.com/docs/api-reference/files/create) + for how to upload a file. + + Your input file must be formatted as a + [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput), + and must be uploaded with the purpose `batch`. + """ + + metadata: Optional[dict[str, str]] + """Optional custom metadata for the batch.""" + + auto_delete_input_file: Optional[bool] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py new file mode 100644 index 0000000000..f934db1978 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_error.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from ..core import BaseModel + +__all__ = ["BatchError"] + + +class BatchError(BaseModel): + code: Optional[str] = None + """定义的业务错误码""" + + line: Optional[int] = None + """文件中的行号""" + + message: Optional[str] = None + """关于对话文件中的错误的描述""" + + param: Optional[str] = None + """参数名称,如果有的话""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py new file mode 100644 index 0000000000..1a68167132 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_list_params.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from typing_extensions import TypedDict + +__all__ = ["BatchListParams"] + + +class BatchListParams(TypedDict, total=False): + after: str + """分页的游标,用于获取下一页的数据。 + + `after` 是一个指向当前页面的游标,用于获取下一页的数据。如果没有提供 `after`,则返回第一页的数据。 + list. + """ + + limit: int + """这个参数用于限制返回的结果数量。 + + Limit 用于限制返回的结果数量。默认值为 10 + """ diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py new file mode 100644 index 0000000000..ca3ccae625 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/batch_request_counts.py @@ -0,0 +1,14 @@ +from ..core import BaseModel + +__all__ = ["BatchRequestCounts"] + + +class BatchRequestCounts(BaseModel): + completed: int + """这个数字表示已经完成的请求。""" + + failed: int + """这个数字表示失败的请求。""" + + total: int + """这个数字表示总的请求。""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py index a0645b0916..c1eed070f3 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/async_chat_completion.py @@ -1,10 +1,9 @@ from typing import Optional -from pydantic import BaseModel - +from ...core import BaseModel from .chat_completion import CompletionChoice, CompletionUsage -__all__ = ["AsyncTaskStatus"] +__all__ = ["AsyncTaskStatus", "AsyncCompletion"] class AsyncTaskStatus(BaseModel): diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py index 4b3a929a2b..1945a826cd 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion.py @@ -1,6 +1,6 @@ from typing import Optional -from pydantic import BaseModel +from ...core import BaseModel __all__ = ["Completion", "CompletionUsage"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py index c250699741..27fad0008a 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/chat_completion_chunk.py @@ -1,8 +1,9 @@ -from typing import Optional +from typing import Any, Optional -from pydantic import BaseModel +from ...core import BaseModel __all__ = [ + "CompletionUsage", "ChatCompletionChunk", "Choice", "ChoiceDelta", @@ -53,3 +54,4 @@ class ChatCompletionChunk(BaseModel): created: Optional[int] = None model: Optional[str] = None usage: Optional[CompletionUsage] = None + extra_json: dict[str, Any] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py new file mode 100644 index 0000000000..666b38855c --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/chat/code_geex/code_geex_params.py @@ -0,0 +1,146 @@ +from typing import Literal, Optional + +from typing_extensions import Required, TypedDict + +__all__ = [ + "CodeGeexTarget", + "CodeGeexContext", + "CodeGeexExtra", +] + + +class CodeGeexTarget(TypedDict, total=False): + """补全的内容参数""" + + path: Optional[str] + """文件路径""" + language: Required[ + Literal[ + "c", + "c++", + "cpp", + "c#", + "csharp", + "c-sharp", + "css", + "cuda", + "dart", + "lua", + "objectivec", + "objective-c", + "objective-c++", + "python", + "perl", + "prolog", + "swift", + "lisp", + "java", + "scala", + "tex", + "jsx", + "tsx", + "vue", + "markdown", + "html", + "php", + "js", + "javascript", + "typescript", + "go", + "shell", + "rust", + "sql", + "kotlin", + "vb", + "ruby", + "pascal", + "r", + "fortran", + "lean", + "matlab", + "delphi", + "scheme", + "basic", + "assembly", + "groovy", + "abap", + "gdscript", + "haskell", + "julia", + "elixir", + "excel", + "clojure", + "actionscript", + "solidity", + "powershell", + "erlang", + "cobol", + "alloy", + "awk", + "thrift", + "sparql", + "augeas", + "cmake", + "f-sharp", + "stan", + "isabelle", + "dockerfile", + "rmarkdown", + "literate-agda", + "tcl", + "glsl", + "antlr", + "verilog", + "racket", + "standard-ml", + "elm", + "yaml", + "smalltalk", + "ocaml", + "idris", + "visual-basic", + "protocol-buffer", + "bluespec", + "applescript", + "makefile", + "tcsh", + "maple", + "systemverilog", + "literate-coffeescript", + "vhdl", + "restructuredtext", + "sas", + "literate-haskell", + "java-server-pages", + "coffeescript", + "emacs-lisp", + "mathematica", + "xslt", + "zig", + "common-lisp", + "stata", + "agda", + "ada", + ] + ] + """代码语言类型,如python""" + code_prefix: Required[str] + """补全位置的前文""" + code_suffix: Required[str] + """补全位置的后文""" + + +class CodeGeexContext(TypedDict, total=False): + """附加代码""" + + path: Required[str] + """附加代码文件的路径""" + code: Required[str] + """附加的代码内容""" + + +class CodeGeexExtra(TypedDict, total=False): + target: Required[CodeGeexTarget] + """补全的内容参数""" + contexts: Optional[list[CodeGeexContext]] + """附加代码""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py index e01f2c815f..8425b5c866 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/embeddings.py @@ -2,8 +2,7 @@ from __future__ import annotations from typing import Optional -from pydantic import BaseModel - +from ..core import BaseModel from .chat.chat_completion import CompletionUsage __all__ = ["Embedding", "EmbeddingsResponded"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py new file mode 100644 index 0000000000..bbaf59e4d7 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/__init__.py @@ -0,0 +1,5 @@ +from .file_deleted import FileDeleted +from .file_object import FileObject, ListOfFileObject +from .upload_detail import UploadDetail + +__all__ = ["FileObject", "ListOfFileObject", "UploadDetail", "FileDeleted"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py new file mode 100644 index 0000000000..4ef93b1c05 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_create_params.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import Literal, Optional + +from typing_extensions import Required, TypedDict + +__all__ = ["FileCreateParams"] + +from ...core import FileTypes +from . import UploadDetail + + +class FileCreateParams(TypedDict, total=False): + file: FileTypes + """file和 upload_detail二选一必填""" + + upload_detail: list[UploadDetail] + """file和 upload_detail二选一必填""" + + purpose: Required[Literal["fine-tune", "retrieval", "batch"]] + """ + 上传文件的用途,支持 "fine-tune和 "retrieval" + retrieval支持上传Doc、Docx、PDF、Xlsx、URL类型文件,且单个文件的大小不超过 5MB。 + fine-tune支持上传.jsonl文件且当前单个文件的大小最大可为 100 MB ,文件中语料格式需满足微调指南中所描述的格式。 + """ + custom_separator: Optional[list[str]] + """ + 当 purpose 为 retrieval 且文件类型为 pdf, url, docx 时上传,切片规则默认为 `\n`。 + """ + knowledge_id: str + """ + 当文件上传目的为 retrieval 时,需要指定知识库ID进行上传。 + """ + + sentence_size: int + """ + 当文件上传目的为 retrieval 时,需要指定知识库ID进行上传。 + """ diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py new file mode 100644 index 0000000000..a384b1a69a --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_deleted.py @@ -0,0 +1,13 @@ +from typing import Literal + +from ...core import BaseModel + +__all__ = ["FileDeleted"] + + +class FileDeleted(BaseModel): + id: str + + deleted: bool + + object: Literal["file"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py similarity index 86% rename from api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py rename to api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py index 75f76fe969..8f9d0fbb8e 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/file_object.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/file_object.py @@ -1,8 +1,8 @@ from typing import Optional -from pydantic import BaseModel +from ...core import BaseModel -__all__ = ["FileObject"] +__all__ = ["FileObject", "ListOfFileObject"] class FileObject(BaseModel): diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py new file mode 100644 index 0000000000..8f1ca5ce57 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/files/upload_detail.py @@ -0,0 +1,13 @@ +from typing import Optional + +from ...core import BaseModel + + +class UploadDetail(BaseModel): + url: str + knowledge_type: int + file_name: Optional[str] = None + sentence_size: Optional[int] = None + custom_separator: Optional[list[str]] = None + callback_url: Optional[str] = None + callback_header: Optional[dict[str, str]] = None diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py index 1d3930286b..75c7553dbe 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job.py @@ -1,6 +1,6 @@ from typing import Optional, Union -from pydantic import BaseModel +from ...core import BaseModel __all__ = ["FineTuningJob", "Error", "Hyperparameters", "ListOfFineTuningJob"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py index e26b448534..f996cff114 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/fine_tuning_job_event.py @@ -1,6 +1,6 @@ from typing import Optional, Union -from pydantic import BaseModel +from ...core import BaseModel __all__ = ["FineTuningJobEvent", "Metric", "JobEvent"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py new file mode 100644 index 0000000000..57d0d2511d --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/__init__.py @@ -0,0 +1 @@ +from .fine_tuned_models import FineTunedModelsStatus diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py new file mode 100644 index 0000000000..b286a5b577 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/fine_tuning/models/fine_tuned_models.py @@ -0,0 +1,13 @@ +from typing import ClassVar + +from ....core import PYDANTIC_V2, BaseModel, ConfigDict + +__all__ = ["FineTunedModelsStatus"] + + +class FineTunedModelsStatus(BaseModel): + if PYDANTIC_V2: + model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow", protected_namespaces=()) + request_id: str # 请求id + model_name: str # 模型名称 + delete_status: str # 删除状态 deleting(删除中), deleted (已删除) diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py index b352ce0954..3bcad0acab 100644 --- a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/image.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import Optional -from pydantic import BaseModel +from ..core import BaseModel __all__ = ["GeneratedImage", "ImagesResponded"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py new file mode 100644 index 0000000000..8c81d703e2 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/__init__.py @@ -0,0 +1,8 @@ +from .knowledge import KnowledgeInfo +from .knowledge_used import KnowledgeStatistics, KnowledgeUsed + +__all__ = [ + "KnowledgeInfo", + "KnowledgeStatistics", + "KnowledgeUsed", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py new file mode 100644 index 0000000000..32e23e6dab --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/__init__.py @@ -0,0 +1,8 @@ +from .document import DocumentData, DocumentFailedInfo, DocumentObject, DocumentSuccessinfo + +__all__ = [ + "DocumentData", + "DocumentObject", + "DocumentSuccessinfo", + "DocumentFailedInfo", +] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py new file mode 100644 index 0000000000..b9a1646391 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document.py @@ -0,0 +1,51 @@ +from typing import Optional + +from ....core import BaseModel + +__all__ = ["DocumentData", "DocumentObject", "DocumentSuccessinfo", "DocumentFailedInfo"] + + +class DocumentSuccessinfo(BaseModel): + documentId: Optional[str] = None + """文件id""" + filename: Optional[str] = None + """文件名称""" + + +class DocumentFailedInfo(BaseModel): + failReason: Optional[str] = None + """上传失败的原因,包括:文件格式不支持、文件大小超出限制、知识库容量已满、容量上限为 50 万字。""" + filename: Optional[str] = None + """文件名称""" + documentId: Optional[str] = None + """知识库id""" + + +class DocumentObject(BaseModel): + """文档信息""" + + successInfos: Optional[list[DocumentSuccessinfo]] = None + """上传成功的文件信息""" + failedInfos: Optional[list[DocumentFailedInfo]] = None + """上传失败的文件信息""" + + +class DocumentDataFailInfo(BaseModel): + """失败原因""" + + embedding_code: Optional[int] = ( + None # 失败码 10001:知识不可用,知识库空间已达上限 10002:知识不可用,知识库空间已达上限(字数超出限制) + ) + embedding_msg: Optional[str] = None # 失败原因 + + +class DocumentData(BaseModel): + id: str = None # 知识唯一id + custom_separator: list[str] = None # 切片规则 + sentence_size: str = None # 切片大小 + length: int = None # 文件大小(字节) + word_num: int = None # 文件字数 + name: str = None # 文件名 + url: str = None # 文件下载链接 + embedding_stat: int = None # 0:向量化中 1:向量化完成 2:向量化失败 + failInfo: Optional[DocumentDataFailInfo] = None # 失败原因 向量化失败embedding_stat=2的时候 会有此值 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py new file mode 100644 index 0000000000..509cb3a451 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_edit_params.py @@ -0,0 +1,29 @@ +from typing import Optional, TypedDict + +__all__ = ["DocumentEditParams"] + + +class DocumentEditParams(TypedDict): + """ + 知识参数类型定义 + + Attributes: + id (str): 知识ID + knowledge_type (int): 知识类型: + 1:文章知识: 支持pdf,url,docx + 2.问答知识-文档: 支持pdf,url,docx + 3.问答知识-表格: 支持xlsx + 4.商品库-表格: 支持xlsx + 5.自定义: 支持pdf,url,docx + custom_separator (Optional[List[str]]): 当前知识类型为自定义(knowledge_type=5)时的切片规则,默认\n + sentence_size (Optional[int]): 当前知识类型为自定义(knowledge_type=5)时的切片字数,取值范围: 20-2000,默认300 + callback_url (Optional[str]): 回调地址 + callback_header (Optional[dict]): 回调时携带的header + """ + + id: str + knowledge_type: int + custom_separator: Optional[list[str]] + sentence_size: Optional[int] + callback_url: Optional[str] + callback_header: Optional[dict[str, str]] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py new file mode 100644 index 0000000000..910c8c045e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_params.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from typing import Optional + +from typing_extensions import TypedDict + + +class DocumentListParams(TypedDict, total=False): + """ + 文件查询参数类型定义 + + Attributes: + purpose (Optional[str]): 文件用途 + knowledge_id (Optional[str]): 当文件用途为 retrieval 时,需要提供查询的知识库ID + page (Optional[int]): 页,默认1 + limit (Optional[int]): 查询文件列表数,默认10 + after (Optional[str]): 查询指定fileID之后的文件列表(当文件用途为 fine-tune 时需要) + order (Optional[str]): 排序规则,可选值['desc', 'asc'],默认desc(当文件用途为 fine-tune 时需要) + """ + + purpose: Optional[str] + knowledge_id: Optional[str] + page: Optional[int] + limit: Optional[int] + after: Optional[str] + order: Optional[str] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py new file mode 100644 index 0000000000..acae4fad9f --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/document/document_list_resp.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from ....core import BaseModel +from . import DocumentData + +__all__ = ["DocumentPage"] + + +class DocumentPage(BaseModel): + list: list[DocumentData] + object: str diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py new file mode 100644 index 0000000000..bc6f159eb2 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge.py @@ -0,0 +1,21 @@ +from typing import Optional + +from ...core import BaseModel + +__all__ = ["KnowledgeInfo"] + + +class KnowledgeInfo(BaseModel): + id: Optional[str] = None + """知识库唯一 id""" + embedding_id: Optional[str] = ( + None # 知识库绑定的向量化模型 见模型列表 [内部服务开放接口文档](https://lslfd0slxc.feishu.cn/docx/YauWdbBiMopV0FxB7KncPWCEn8f#H15NduiQZo3ugmxnWQFcfAHpnQ4) + ) + name: Optional[str] = None # 知识库名称 100字限制 + customer_identifier: Optional[str] = None # 用户标识 长度32位以内 + description: Optional[str] = None # 知识库描述 500字限制 + background: Optional[str] = None # 背景颜色(给枚举)'blue', 'red', 'orange', 'purple', 'sky' + icon: Optional[str] = ( + None # 知识库图标(给枚举) question: 问号、book: 书籍、seal: 印章、wrench: 扳手、tag: 标签、horn: 喇叭、house: 房子 # noqa: E501 + ) + bucket_id: Optional[str] = None # 桶id 限制32位 diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py new file mode 100644 index 0000000000..c3da201727 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_create_params.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import Literal, Optional + +from typing_extensions import TypedDict + +__all__ = ["KnowledgeBaseParams"] + + +class KnowledgeBaseParams(TypedDict): + """ + 知识库参数类型定义 + + Attributes: + embedding_id (int): 知识库绑定的向量化模型ID + name (str): 知识库名称,限制100字 + customer_identifier (Optional[str]): 用户标识,长度32位以内 + description (Optional[str]): 知识库描述,限制500字 + background (Optional[Literal['blue', 'red', 'orange', 'purple', 'sky']]): 背景颜色 + icon (Optional[Literal['question', 'book', 'seal', 'wrench', 'tag', 'horn', 'house']]): 知识库图标 + bucket_id (Optional[str]): 桶ID,限制32位 + """ + + embedding_id: int + name: str + customer_identifier: Optional[str] + description: Optional[str] + background: Optional[Literal["blue", "red", "orange", "purple", "sky"]] = None + icon: Optional[Literal["question", "book", "seal", "wrench", "tag", "horn", "house"]] = None + bucket_id: Optional[str] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py new file mode 100644 index 0000000000..a221b28e46 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_params.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing_extensions import TypedDict + +__all__ = ["KnowledgeListParams"] + + +class KnowledgeListParams(TypedDict, total=False): + page: int = 1 + """ 页码,默认 1,第一页 + """ + + size: int = 10 + """每页数量 默认10 + """ diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py new file mode 100644 index 0000000000..e462eddc55 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_list_resp.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from ...core import BaseModel +from . import KnowledgeInfo + +__all__ = ["KnowledgePage"] + + +class KnowledgePage(BaseModel): + list: list[KnowledgeInfo] + object: str diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py new file mode 100644 index 0000000000..cfda709702 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/knowledge/knowledge_used.py @@ -0,0 +1,21 @@ +from typing import Optional + +from ...core import BaseModel + +__all__ = ["KnowledgeStatistics", "KnowledgeUsed"] + + +class KnowledgeStatistics(BaseModel): + """ + 使用量统计 + """ + + word_num: Optional[int] = None + length: Optional[int] = None + + +class KnowledgeUsed(BaseModel): + used: Optional[KnowledgeStatistics] = None + """已使用量""" + total: Optional[KnowledgeStatistics] = None + """知识库总量""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py new file mode 100644 index 0000000000..c9bd60419c --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/__init__.py @@ -0,0 +1,3 @@ +from .sensitive_word_check import SensitiveWordCheckRequest + +__all__ = ["SensitiveWordCheckRequest"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py new file mode 100644 index 0000000000..0c37d99e65 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/sensitive_word_check/sensitive_word_check.py @@ -0,0 +1,14 @@ +from typing import Optional + +from typing_extensions import TypedDict + + +class SensitiveWordCheckRequest(TypedDict, total=False): + type: Optional[str] + """敏感词类型,当前仅支持ALL""" + status: Optional[str] + """敏感词启用禁用状态 + 启用:ENABLE + 禁用:DISABLE + 备注:默认开启敏感词校验,如果要关闭敏感词校验,需联系商务获取对应权限,否则敏感词禁用不生效。 + """ diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py new file mode 100644 index 0000000000..62f77344ee --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/__init__.py @@ -0,0 +1,9 @@ +from .web_search import ( + SearchIntent, + SearchRecommend, + SearchResult, + WebSearch, +) +from .web_search_chunk import WebSearchChunk + +__all__ = ["WebSearch", "SearchIntent", "SearchResult", "SearchRecommend", "WebSearchChunk"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py new file mode 100644 index 0000000000..b3a3b26f07 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/tools_web_search_params.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import Optional, Union + +from typing_extensions import TypedDict + +__all__ = ["WebSearchParams"] + + +class WebSearchParams(TypedDict): + """ + 工具名:web-search-pro参数类型定义 + + Attributes: + :param model: str, 模型名称 + :param request_id: Optional[str], 请求ID + :param stream: Optional[bool], 是否流式 + :param messages: Union[str, List[str], List[int], object, None], + 包含历史对话上下文的内容,按照 {"role": "user", "content": "你好"} 的json 数组形式进行传参 + 当前版本仅支持 User Message 单轮对话,工具会理解User Message并进行搜索, + 请尽可能传入不带指令格式的用户原始提问,以提高搜索准确率。 + :param scope: Optional[str], 指定搜索范围,全网、学术等,默认全网 + :param location: Optional[str], 指定搜索用户地区 location 提高相关性 + :param recent_days: Optional[int],支持指定返回 N 天(1-30)更新的搜索结果 + + + """ + + model: str + request_id: Optional[str] + stream: Optional[bool] + messages: Union[str, list[str], list[int], object, None] + scope: Optional[str] = None + location: Optional[str] = None + recent_days: Optional[int] = None diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py new file mode 100644 index 0000000000..ac9fa3821e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search.py @@ -0,0 +1,71 @@ +from typing import Optional + +from ...core import BaseModel + +__all__ = [ + "WebSearch", + "SearchIntent", + "SearchResult", + "SearchRecommend", +] + + +class SearchIntent(BaseModel): + index: int + # 搜索轮次,默认为 0 + query: str + # 搜索优化 query + intent: str + # 判断的意图类型 + keywords: str + # 搜索关键词 + + +class SearchResult(BaseModel): + index: int + # 搜索轮次,默认为 0 + title: str + # 标题 + link: str + # 链接 + content: str + # 内容 + icon: str + # 图标 + media: str + # 来源媒体 + refer: str + # 角标序号 [ref_1] + + +class SearchRecommend(BaseModel): + index: int + # 搜索轮次,默认为 0 + query: str + # 推荐query + + +class WebSearchMessageToolCall(BaseModel): + id: str + search_intent: Optional[SearchIntent] + search_result: Optional[SearchResult] + search_recommend: Optional[SearchRecommend] + type: str + + +class WebSearchMessage(BaseModel): + role: str + tool_calls: Optional[list[WebSearchMessageToolCall]] = None + + +class WebSearchChoice(BaseModel): + index: int + finish_reason: str + message: WebSearchMessage + + +class WebSearch(BaseModel): + created: Optional[int] = None + choices: list[WebSearchChoice] + request_id: Optional[str] = None + id: Optional[str] = None diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py new file mode 100644 index 0000000000..7fb0e02bb5 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/tools/web_search_chunk.py @@ -0,0 +1,33 @@ +from typing import Optional + +from ...core import BaseModel +from .web_search import SearchIntent, SearchRecommend, SearchResult + +__all__ = ["WebSearchChunk"] + + +class ChoiceDeltaToolCall(BaseModel): + index: int + id: Optional[str] = None + + search_intent: Optional[SearchIntent] = None + search_result: Optional[SearchResult] = None + search_recommend: Optional[SearchRecommend] = None + type: Optional[str] = None + + +class ChoiceDelta(BaseModel): + role: Optional[str] = None + tool_calls: Optional[list[ChoiceDeltaToolCall]] = None + + +class Choice(BaseModel): + delta: ChoiceDelta + finish_reason: Optional[str] = None + index: int + + +class WebSearchChunk(BaseModel): + id: Optional[str] = None + choices: list[Choice] + created: Optional[int] = None diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py new file mode 100644 index 0000000000..b14072b1a7 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/__init__.py @@ -0,0 +1,3 @@ +from .video_object import VideoObject, VideoResult + +__all__ = ["VideoObject", "VideoResult"] diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py new file mode 100644 index 0000000000..f5489d708e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_create_params.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from typing import Optional + +from typing_extensions import TypedDict + +__all__ = ["VideoCreateParams"] + +from ..sensitive_word_check import SensitiveWordCheckRequest + + +class VideoCreateParams(TypedDict, total=False): + model: str + """模型编码""" + prompt: str + """所需视频的文本描述""" + image_url: str + """所需视频的文本描述""" + sensitive_word_check: Optional[SensitiveWordCheckRequest] + """支持 URL 或者 Base64、传入 image 奖进行图生视频 + * 图片格式: + * 图片大小:""" + request_id: str + """由用户端传参,需保证唯一性;用于区分每次请求的唯一标识,用户端不传时平台会默认生成。""" + + user_id: str + """用户端。""" diff --git a/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py new file mode 100644 index 0000000000..85c3844d8a --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/zhipuai_sdk/types/video/video_object.py @@ -0,0 +1,30 @@ +from typing import Optional + +from ...core import BaseModel + +__all__ = ["VideoObject", "VideoResult"] + + +class VideoResult(BaseModel): + url: str + """视频url""" + cover_image_url: str + """预览图""" + + +class VideoObject(BaseModel): + id: Optional[str] = None + """智谱 AI 开放平台生成的任务订单号,调用请求结果接口时请使用此订单号""" + + model: str + """模型名称""" + + video_result: list[VideoResult] + """视频生成结果""" + + task_status: str + """处理状态,PROCESSING(处理中),SUCCESS(成功),FAIL(失败) + 注:处理中状态需通过查询获取结果""" + + request_id: str + """用户在客户端请求时提交的任务编号或者平台生成的任务编号""" diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.py b/api/core/tools/provider/builtin/cogview/tools/cogview3.py index 9039708588..085084ca38 100644 --- a/api/core/tools/provider/builtin/cogview/tools/cogview3.py +++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.py @@ -21,15 +21,22 @@ class CogView3Tool(BuiltinTool): ) size_mapping = { "square": "1024x1024", - "vertical": "1024x1792", - "horizontal": "1792x1024", + "vertical_768": "768x1344", + "vertical_864": "864x1152", + "horizontal_1344": "1344x768", + "horizontal_1152": "1152x864", + "widescreen_1440": "1440x720", + "tallscreen_720": "720x1440", } # prompt prompt = tool_parameters.get("prompt", "") if not prompt: return self.create_text_message("Please input prompt") - # get size - size = size_mapping[tool_parameters.get("size", "square")] + # get size key + size_key = tool_parameters.get("size", "square") + # cogview-3-plus get size + if size_key != "cogview_3": + size = size_mapping[size_key] # get n n = tool_parameters.get("n", 1) # get quality @@ -43,16 +50,29 @@ class CogView3Tool(BuiltinTool): # set extra body seed_id = tool_parameters.get("seed_id", self._generate_random_id(8)) extra_body = {"seed": seed_id} - response = client.images.generations( - prompt=prompt, - model="cogview-3", - size=size, - n=n, - extra_body=extra_body, - style=style, - quality=quality, - response_format="b64_json", - ) + # cogview-3-plus + if size_key != "cogview_3": + response = client.images.generations( + prompt=prompt, + model="cogview-3-plus", + size=size, + n=n, + extra_body=extra_body, + style=style, + quality=quality, + response_format="b64_json", + ) + # cogview-3 + else: + response = client.images.generations( + prompt=prompt, + model="cogview-3", + n=n, + extra_body=extra_body, + style=style, + quality=quality, + response_format="b64_json", + ) result = [] for image in response.data: result.append(self.create_image_message(image=image.url)) diff --git a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml index 1de3f599b6..9ab5c2729b 100644 --- a/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml +++ b/api/core/tools/provider/builtin/cogview/tools/cogview3.yaml @@ -42,21 +42,46 @@ parameters: pt_BR: Image size form: form options: + - value: cogview_3 + label: + en_US: Square_cogview_3(1024x1024) + zh_Hans: 方_cogview_3(1024x1024) + pt_BR: Square_cogview_3(1024x1024) - value: square label: - en_US: Squre(1024x1024) + en_US: Square(1024x1024) zh_Hans: 方(1024x1024) - pt_BR: Squre(1024x1024) - - value: vertical + pt_BR: Square(1024x1024) + - value: vertical_768 label: - en_US: Vertical(1024x1792) - zh_Hans: 竖屏(1024x1792) - pt_BR: Vertical(1024x1792) - - value: horizontal + en_US: Vertical(768x1344) + zh_Hans: 竖屏(768x1344) + pt_BR: Vertical(768x1344) + - value: vertical_864 label: - en_US: Horizontal(1792x1024) - zh_Hans: 横屏(1792x1024) - pt_BR: Horizontal(1792x1024) + en_US: Vertical(864x1152) + zh_Hans: 竖屏(864x1152) + pt_BR: Vertical(864x1152) + - value: horizontal_1344 + label: + en_US: Horizontal(1344x768) + zh_Hans: 横屏(1344x768) + pt_BR: Horizontal(1344x768) + - value: horizontal_1152 + label: + en_US: Horizontal(1152x864) + zh_Hans: 横屏(1152x864) + pt_BR: Horizontal(1152x864) + - value: widescreen_1440 + label: + en_US: Widescreen(1440x720) + zh_Hans: 宽屏(1440x720) + pt_BR: Widescreen(1440x720) + - value: tallscreen_720 + label: + en_US: Tallscreen(720x1440) + zh_Hans: 高屏(720x1440) + pt_BR: Tallscreen(720x1440) default: square - name: n type: number From c9f1e18df1e400ce5c6599d7a528ce4d50e1bdea Mon Sep 17 00:00:00 2001 From: AAEE86 <33052466+AAEE86@users.noreply.github.com> Date: Sun, 22 Sep 2024 10:14:33 +0800 Subject: [PATCH 29/40] Add model parameter translation (#8509) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: swingchen01 Co-authored-by: 陈长君 --- .../model_providers/ollama/llm/llm.py | 83 ++++++++++++------- .../openai_api_compatible/llm/llm.py | 35 ++++++-- 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index 1ed77a2ee8..ff732e6925 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -472,12 +472,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel): ParameterRule( name=DefaultParameterName.TEMPERATURE.value, use_template=DefaultParameterName.TEMPERATURE.value, - label=I18nObject(en_US="Temperature"), + label=I18nObject(en_US="Temperature", zh_Hans="温度"), type=ParameterType.FLOAT, help=I18nObject( en_US="The temperature of the model. " "Increasing the temperature will make the model answer " - "more creatively. (Default: 0.8)" + "more creatively. (Default: 0.8)", + zh_Hans="模型的温度。增加温度将使模型的回答更具创造性。(默认值:0.8)", ), default=0.1, min=0, @@ -486,12 +487,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel): ParameterRule( name=DefaultParameterName.TOP_P.value, use_template=DefaultParameterName.TOP_P.value, - label=I18nObject(en_US="Top P"), + label=I18nObject(en_US="Top P", zh_Hans="Top P"), type=ParameterType.FLOAT, help=I18nObject( en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to " "more diverse text, while a lower value (e.g., 0.5) will generate more " - "focused and conservative text. (Default: 0.9)" + "focused and conservative text. (Default: 0.9)", + zh_Hans="与top-k一起工作。较高的值(例如,0.95)会导致生成更多样化的文本,而较低的值(例如,0.5)会生成更专注和保守的文本。(默认值:0.9)", ), default=0.9, min=0, @@ -499,12 +501,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel): ), ParameterRule( name="top_k", - label=I18nObject(en_US="Top K"), + label=I18nObject(en_US="Top K", zh_Hans="Top K"), type=ParameterType.INT, help=I18nObject( en_US="Reduces the probability of generating nonsense. " "A higher value (e.g. 100) will give more diverse answers, " - "while a lower value (e.g. 10) will be more conservative. (Default: 40)" + "while a lower value (e.g. 10) will be more conservative. (Default: 40)", + zh_Hans="减少生成无意义内容的可能性。较高的值(例如100)将提供更多样化的答案,而较低的值(例如10)将更为保守。(默认值:40)", ), min=1, max=100, @@ -516,7 +519,8 @@ class OllamaLargeLanguageModel(LargeLanguageModel): help=I18nObject( en_US="Sets how strongly to penalize repetitions. " "A higher value (e.g., 1.5) will penalize repetitions more strongly, " - "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)" + "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)", + zh_Hans="设置对重复内容的惩罚强度。一个较高的值(例如,1.5)会更强地惩罚重复内容,而一个较低的值(例如,0.9)则会相对宽容。(默认值:1.1)", ), min=-2, max=2, @@ -524,11 +528,12 @@ class OllamaLargeLanguageModel(LargeLanguageModel): ParameterRule( name="num_predict", use_template="max_tokens", - label=I18nObject(en_US="Num Predict"), + label=I18nObject(en_US="Num Predict", zh_Hans="最大令牌数预测"), type=ParameterType.INT, help=I18nObject( en_US="Maximum number of tokens to predict when generating text. " - "(Default: 128, -1 = infinite generation, -2 = fill context)" + "(Default: 128, -1 = infinite generation, -2 = fill context)", + zh_Hans="生成文本时预测的最大令牌数。(默认值:128,-1 = 无限生成,-2 = 填充上下文)", ), default=(512 if int(credentials.get("max_tokens", 4096)) >= 768 else 128), min=-2, @@ -536,121 +541,137 @@ class OllamaLargeLanguageModel(LargeLanguageModel): ), ParameterRule( name="mirostat", - label=I18nObject(en_US="Mirostat sampling"), + label=I18nObject(en_US="Mirostat sampling", zh_Hans="Mirostat 采样"), type=ParameterType.INT, help=I18nObject( en_US="Enable Mirostat sampling for controlling perplexity. " - "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)" + "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", + zh_Hans="启用 Mirostat 采样以控制困惑度。" + "(默认值:0,0 = 禁用,1 = Mirostat,2 = Mirostat 2.0)", ), min=0, max=2, ), ParameterRule( name="mirostat_eta", - label=I18nObject(en_US="Mirostat Eta"), + label=I18nObject(en_US="Mirostat Eta", zh_Hans="学习率"), type=ParameterType.FLOAT, help=I18nObject( en_US="Influences how quickly the algorithm responds to feedback from " "the generated text. A lower learning rate will result in slower adjustments, " "while a higher learning rate will make the algorithm more responsive. " - "(Default: 0.1)" + "(Default: 0.1)", + zh_Hans="影响算法对生成文本反馈响应的速度。较低的学习率会导致调整速度变慢,而较高的学习率会使得算法更加灵敏。(默认值:0.1)", ), precision=1, ), ParameterRule( name="mirostat_tau", - label=I18nObject(en_US="Mirostat Tau"), + label=I18nObject(en_US="Mirostat Tau", zh_Hans="文本连贯度"), type=ParameterType.FLOAT, help=I18nObject( en_US="Controls the balance between coherence and diversity of the output. " - "A lower value will result in more focused and coherent text. (Default: 5.0)" + "A lower value will result in more focused and coherent text. (Default: 5.0)", + zh_Hans="控制输出的连贯性和多样性之间的平衡。较低的值会导致更专注和连贯的文本。(默认值:5.0)", ), precision=1, ), ParameterRule( name="num_ctx", - label=I18nObject(en_US="Size of context window"), + label=I18nObject(en_US="Size of context window", zh_Hans="上下文窗口大小"), type=ParameterType.INT, help=I18nObject( - en_US="Sets the size of the context window used to generate the next token. (Default: 2048)" + en_US="Sets the size of the context window used to generate the next token. (Default: 2048)", + zh_Hans="设置用于生成下一个标记的上下文窗口大小。(默认值:2048)", ), default=2048, min=1, ), ParameterRule( name="num_gpu", - label=I18nObject(en_US="GPU Layers"), + label=I18nObject(en_US="GPU Layers", zh_Hans="GPU 层数"), type=ParameterType.INT, help=I18nObject( en_US="The number of layers to offload to the GPU(s). " "On macOS it defaults to 1 to enable metal support, 0 to disable." "As long as a model fits into one gpu it stays in one. " - "It does not set the number of GPU(s). " + "It does not set the number of GPU(s). ", + zh_Hans="加载到 GPU 的层数。在 macOS 上,默认为 1 以启用 Metal 支持,设置为 0 则禁用。" + "只要模型适合一个 GPU,它就保留在其中。它不设置 GPU 的数量。", ), min=-1, default=1, ), ParameterRule( name="num_thread", - label=I18nObject(en_US="Num Thread"), + label=I18nObject(en_US="Num Thread", zh_Hans="线程数"), type=ParameterType.INT, help=I18nObject( en_US="Sets the number of threads to use during computation. " "By default, Ollama will detect this for optimal performance. " "It is recommended to set this value to the number of physical CPU cores " - "your system has (as opposed to the logical number of cores)." + "your system has (as opposed to the logical number of cores).", + zh_Hans="设置计算过程中使用的线程数。默认情况下,Ollama会检测以获得最佳性能。建议将此值设置为系统拥有的物理CPU核心数(而不是逻辑核心数)。", ), min=1, ), ParameterRule( name="repeat_last_n", - label=I18nObject(en_US="Repeat last N"), + label=I18nObject(en_US="Repeat last N", zh_Hans="回溯内容"), type=ParameterType.INT, help=I18nObject( en_US="Sets how far back for the model to look back to prevent repetition. " - "(Default: 64, 0 = disabled, -1 = num_ctx)" + "(Default: 64, 0 = disabled, -1 = num_ctx)", + zh_Hans="设置模型回溯多远的内容以防止重复。(默认值:64,0 = 禁用,-1 = num_ctx)", ), min=-1, ), ParameterRule( name="tfs_z", - label=I18nObject(en_US="TFS Z"), + label=I18nObject(en_US="TFS Z", zh_Hans="减少标记影响"), type=ParameterType.FLOAT, help=I18nObject( en_US="Tail free sampling is used to reduce the impact of less probable tokens " "from the output. A higher value (e.g., 2.0) will reduce the impact more, " - "while a value of 1.0 disables this setting. (default: 1)" + "while a value of 1.0 disables this setting. (default: 1)", + zh_Hans="用于减少输出中不太可能的标记的影响。较高的值(例如,2.0)会更多地减少这种影响,而1.0的值则会禁用此设置。(默认值:1)", ), precision=1, ), ParameterRule( name="seed", - label=I18nObject(en_US="Seed"), + label=I18nObject(en_US="Seed", zh_Hans="随机数种子"), type=ParameterType.INT, help=I18nObject( en_US="Sets the random number seed to use for generation. Setting this to " "a specific number will make the model generate the same text for " - "the same prompt. (Default: 0)" + "the same prompt. (Default: 0)", + zh_Hans="设置用于生成的随机数种子。将此设置为特定数字将使模型对相同的提示生成相同的文本。(默认值:0)", ), ), ParameterRule( name="keep_alive", - label=I18nObject(en_US="Keep Alive"), + label=I18nObject(en_US="Keep Alive", zh_Hans="模型存活时间"), type=ParameterType.STRING, help=I18nObject( en_US="Sets how long the model is kept in memory after generating a response. " "This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours)." " A negative number keeps the model loaded indefinitely, and '0' unloads the model" " immediately after generating a response." - " Valid time units are 's','m','h'. (Default: 5m)" + " Valid time units are 's','m','h'. (Default: 5m)", + zh_Hans="设置模型在生成响应后在内存中保留的时间。" + "这必须是一个带有单位的持续时间字符串(例如,'10m' 表示10分钟,'24h' 表示24小时)。" + "负数表示无限期地保留模型,'0'表示在生成响应后立即卸载模型。" + "有效的时间单位有 's'(秒)、'm'(分钟)、'h'(小时)。(默认值:5m)", ), ), ParameterRule( name="format", - label=I18nObject(en_US="Format"), + label=I18nObject(en_US="Format", zh_Hans="返回格式"), type=ParameterType.STRING, help=I18nObject( - en_US="the format to return a response in. Currently the only accepted value is json." + en_US="the format to return a response in. Currently the only accepted value is json.", + zh_Hans="返回响应的格式。目前唯一接受的值是json。", ), options=["json"], ), diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py index 5a8a754f72..c2ffe653c8 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py @@ -205,7 +205,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): parameter_rules=[ ParameterRule( name=DefaultParameterName.TEMPERATURE.value, - label=I18nObject(en_US="Temperature"), + label=I18nObject(en_US="Temperature", zh_Hans="温度"), + help=I18nObject( + en_US="Kernel sampling threshold. Used to determine the randomness of the results." + "The higher the value, the stronger the randomness." + "The higher the possibility of getting different answers to the same question.", + zh_Hans="核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。", + ), type=ParameterType.FLOAT, default=float(credentials.get("temperature", 0.7)), min=0, @@ -214,7 +220,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): ), ParameterRule( name=DefaultParameterName.TOP_P.value, - label=I18nObject(en_US="Top P"), + label=I18nObject(en_US="Top P", zh_Hans="Top P"), + help=I18nObject( + en_US="The probability threshold of the nucleus sampling method during the generation process." + "The larger the value is, the higher the randomness of generation will be." + "The smaller the value is, the higher the certainty of generation will be.", + zh_Hans="生成过程中核采样方法概率阈值。取值越大,生成的随机性越高;取值越小,生成的确定性越高。", + ), type=ParameterType.FLOAT, default=float(credentials.get("top_p", 1)), min=0, @@ -223,7 +235,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): ), ParameterRule( name=DefaultParameterName.FREQUENCY_PENALTY.value, - label=I18nObject(en_US="Frequency Penalty"), + label=I18nObject(en_US="Frequency Penalty", zh_Hans="频率惩罚"), + help=I18nObject( + en_US="For controlling the repetition rate of words used by the model." + "Increasing this can reduce the repetition of the same words in the model's output.", + zh_Hans="用于控制模型已使用字词的重复率。 提高此项可以降低模型在输出中重复相同字词的重复度。", + ), type=ParameterType.FLOAT, default=float(credentials.get("frequency_penalty", 0)), min=-2, @@ -231,7 +248,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): ), ParameterRule( name=DefaultParameterName.PRESENCE_PENALTY.value, - label=I18nObject(en_US="Presence Penalty"), + label=I18nObject(en_US="Presence Penalty", zh_Hans="存在惩罚"), + help=I18nObject( + en_US="Used to control the repetition rate when generating models." + "Increasing this can reduce the repetition rate of model generation.", + zh_Hans="用于控制模型生成时的重复度。提高此项可以降低模型生成的重复度。", + ), type=ParameterType.FLOAT, default=float(credentials.get("presence_penalty", 0)), min=-2, @@ -239,7 +261,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel): ), ParameterRule( name=DefaultParameterName.MAX_TOKENS.value, - label=I18nObject(en_US="Max Tokens"), + label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"), + help=I18nObject( + en_US="Maximum length of tokens for the model response.", zh_Hans="模型回答的tokens的最大长度。" + ), type=ParameterType.INT, default=512, min=1, From 6c2fa8defc8a2c6a66be45ade7b13e2388fb4b69 Mon Sep 17 00:00:00 2001 From: HJY <1398145450@qq.com> Date: Sun, 22 Sep 2024 10:14:43 +0800 Subject: [PATCH 30/40] fix: form input add tabIndex (#8478) --- web/app/activate/activateForm.tsx | 2 ++ web/app/signin/normalForm.tsx | 2 ++ 2 files changed, 4 insertions(+) diff --git a/web/app/activate/activateForm.tsx b/web/app/activate/activateForm.tsx index 3b1eed6f09..8e9691b354 100644 --- a/web/app/activate/activateForm.tsx +++ b/web/app/activate/activateForm.tsx @@ -143,6 +143,7 @@ const ActivateForm = () => { onChange={e => setName(e.target.value)} placeholder={t('login.namePlaceholder') || ''} className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'} + tabIndex={1} />
@@ -159,6 +160,7 @@ const ActivateForm = () => { onChange={e => setPassword(e.target.value)} placeholder={t('login.passwordPlaceholder') || ''} className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'} + tabIndex={2} />
{t('login.error.passwordInvalid')}
diff --git a/web/app/signin/normalForm.tsx b/web/app/signin/normalForm.tsx index 7f23c7d22e..816df8007d 100644 --- a/web/app/signin/normalForm.tsx +++ b/web/app/signin/normalForm.tsx @@ -217,6 +217,7 @@ const NormalForm = () => { autoComplete="email" placeholder={t('login.emailPlaceholder') || ''} className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm'} + tabIndex={1} /> @@ -241,6 +242,7 @@ const NormalForm = () => { autoComplete="current-password" placeholder={t('login.passwordPlaceholder') || ''} className={'appearance-none block w-full rounded-lg pl-[14px] px-3 py-2 border border-gray-200 hover:border-gray-300 hover:shadow-sm focus:outline-none focus:ring-primary-500 focus:border-primary-500 placeholder-gray-400 caret-primary-600 sm:text-sm pr-10'} + tabIndex={2} />