From 4fe5297e35e630c403f3f5c69c98bdf1562915f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= Date: Fri, 8 Nov 2024 17:04:05 +0800 Subject: [PATCH] feat: add cogVideo tool (#10456) --- .../builtin/cogview/tools/cogvideo.py | 24 ++++++++++++++ .../builtin/cogview/tools/cogvideo.yaml | 32 +++++++++++++++++++ .../builtin/cogview/tools/cogvideo_job.py | 30 +++++++++++++++++ .../builtin/cogview/tools/cogvideo_job.yaml | 21 ++++++++++++ 4 files changed, 107 insertions(+) create mode 100644 api/core/tools/provider/builtin/cogview/tools/cogvideo.py create mode 100644 api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml create mode 100644 api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py create mode 100644 api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml diff --git a/api/core/tools/provider/builtin/cogview/tools/cogvideo.py b/api/core/tools/provider/builtin/cogview/tools/cogvideo.py new file mode 100644 index 0000000000..7f69e833cb --- /dev/null +++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.py @@ -0,0 +1,24 @@ +from typing import Any, Union + +from zhipuai import ZhipuAI + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class CogVideoTool(BuiltinTool): + def _invoke( + self, user_id: str, tool_parameters: dict[str, Any] + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + client = ZhipuAI( + base_url=self.runtime.credentials["zhipuai_base_url"], + api_key=self.runtime.credentials["zhipuai_api_key"], + ) + if not tool_parameters.get("prompt") and not tool_parameters.get("image_url"): + return self.create_text_message("require at least one of prompt and image_url") + + response = client.videos.generations( + model="cogvideox", prompt=tool_parameters.get("prompt"), image_url=tool_parameters.get("image_url") + ) + + return self.create_json_message(response.dict()) diff --git a/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml b/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml new file mode 100644 index 0000000000..3df0cfcea9 --- /dev/null +++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml @@ -0,0 +1,32 @@ +identity: + name: cogvideo + author: hjlarry + label: + en_US: CogVideo + zh_Hans: CogVideo 视频生成 +description: + human: + en_US: Use the CogVideox model provided by ZhipuAI to generate videos based on user prompts and images. + zh_Hans: 使用智谱cogvideox模型,根据用户输入的提示词和图片,生成视频。 + llm: A tool for generating videos. The input is user's prompt or image url or both of them, the output is a task id. You can use another tool with this task id to check the status and get the video. +parameters: + - name: prompt + type: string + label: + en_US: prompt + zh_Hans: 提示词 + human_description: + en_US: The prompt text used to generate video. + zh_Hans: 用于生成视频的提示词。 + llm_description: The prompt text used to generate video. Optional. + form: llm + - name: image_url + type: string + label: + en_US: image url + zh_Hans: 图片链接 + human_description: + en_US: The image url used to generate video. + zh_Hans: 输入一个图片链接,生成的视频将基于该图片和提示词。 + llm_description: The image url used to generate video. Optional. + form: llm diff --git a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py new file mode 100644 index 0000000000..a521f1c28a --- /dev/null +++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py @@ -0,0 +1,30 @@ +from typing import Any, Union + +import httpx +from zhipuai import ZhipuAI + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + + +class CogVideoJobTool(BuiltinTool): + def _invoke( + self, user_id: str, tool_parameters: dict[str, Any] + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + client = ZhipuAI( + api_key=self.runtime.credentials["zhipuai_api_key"], + base_url=self.runtime.credentials["zhipuai_base_url"], + ) + + response = client.videos.retrieve_videos_result(id=tool_parameters.get("id")) + result = [self.create_json_message(response.dict())] + if response.task_status == "SUCCESS": + for item in response.video_result: + video_cover_image = self.create_image_message(item.cover_image_url) + result.append(video_cover_image) + video = self.create_blob_message( + blob=httpx.get(item.url).content, meta={"mime_type": "video/mp4"}, save_as=self.VariableKey.VIDEO + ) + result.append(video) + + return result diff --git a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml new file mode 100644 index 0000000000..fb2eb3ab13 --- /dev/null +++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml @@ -0,0 +1,21 @@ +identity: + name: cogvideo_job + author: hjlarry + label: + en_US: CogVideo Result + zh_Hans: CogVideo 结果获取 +description: + human: + en_US: Get the result of CogVideo tool generation. + zh_Hans: 根据 CogVideo 工具返回的 id 获取视频生成结果。 + llm: Get the result of CogVideo tool generation. The input is the id which is returned by the CogVideo tool. The output is the url of video and video cover image. +parameters: + - name: id + type: string + label: + en_US: id + human_description: + en_US: The id returned by the CogVideo. + zh_Hans: CogVideo 工具返回的 id。 + llm_description: The id returned by the cogvideo. + form: llm