From 0455e4e1a58205740cd1a3afcfa5efb87daa83f6 Mon Sep 17 00:00:00 2001 From: Joel Date: Wed, 7 Aug 2024 11:29:20 +0800 Subject: [PATCH] feat: llm support vision --- .../nodes/_base/components/config-vision.tsx | 57 +++++++++++++++---- .../variable/var-reference-picker.tsx | 4 +- .../components/workflow/nodes/llm/panel.tsx | 40 +++++-------- .../components/workflow/nodes/llm/types.ts | 7 +-- .../workflow/nodes/llm/use-config.ts | 28 +++------ web/app/components/workflow/types.ts | 2 +- web/i18n/en-US/app-debug.ts | 1 + web/i18n/zh-Hans/app-debug.ts | 1 + 8 files changed, 74 insertions(+), 66 deletions(-) diff --git a/web/app/components/workflow/nodes/_base/components/config-vision.tsx b/web/app/components/workflow/nodes/_base/components/config-vision.tsx index 4a8d299986..8a75b78466 100644 --- a/web/app/components/workflow/nodes/_base/components/config-vision.tsx +++ b/web/app/components/workflow/nodes/_base/components/config-vision.tsx @@ -3,31 +3,52 @@ import type { FC } from 'react' import React, { useCallback } from 'react' import { useTranslation } from 'react-i18next' import produce from 'immer' +import VarReferencePicker from './variable/var-reference-picker' import ResolutionPicker from '@/app/components/workflow/nodes/llm/components/resolution-picker' import Field from '@/app/components/workflow/nodes/_base/components/field' import Switch from '@/app/components/base/switch' -import type { VisionSetting } from '@/app/components/workflow/types' -import type { Resolution } from '@/types/app' +import { type ValueSelector, type Var, VarType, type VisionSetting } from '@/app/components/workflow/types' +import { Resolution } from '@/types/app' +import TooltipPlus from '@/app/components/base/tooltip-plus' const i18nPrefix = 'workflow.nodes.llm' type Props = { + isVisionModel: boolean + readOnly: boolean enabled: boolean onEnabledChange: (enabled: boolean) => void - config: VisionSetting + nodeId: string + config?: VisionSetting onConfigChange: (config: VisionSetting) => void } const ConfigVision: FC = ({ + isVisionModel, + readOnly, enabled, onEnabledChange, - config, + nodeId, + config = { + detail: Resolution.high, + valueSelector: [], + }, onConfigChange, }) => { const { t } = useTranslation() + const filterVar = useCallback((payload: Var) => { + return [VarType.file, VarType.arrayFile].includes(payload.type) + }, []) const handleVisionResolutionChange = useCallback((resolution: Resolution) => { const newConfig = produce(config, (draft) => { - draft.resolution = resolution + draft.detail = resolution + }) + onConfigChange(newConfig) + }, [config, onConfigChange]) + + const handleVarSelectorChange = useCallback((valueSelector: ValueSelector | string) => { + const newConfig = produce(config, (draft) => { + draft.valueSelector = valueSelector as ValueSelector }) onConfigChange(newConfig) }, [config, onConfigChange]) @@ -37,15 +58,29 @@ const ConfigVision: FC = ({ title={t(`${i18nPrefix}.vision`)} tooltip={t('appDebug.vision.description')!} operations={ - + // disabled={isVisionModel} + + + } > - {enabled + {(enabled || !isVisionModel) ? ( - +
+ + +
+ ) : null} diff --git a/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx b/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx index 9a41e60f39..89ea6d5228 100644 --- a/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx +++ b/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx @@ -37,7 +37,7 @@ const TRIGGER_DEFAULT_WIDTH = 227 type Props = { className?: string nodeId: string - isShowNodeName: boolean + isShowNodeName?: boolean readonly: boolean value: ValueSelector | string onChange: (value: ValueSelector | string, varKindType: VarKindType, varInfo?: Var) => void @@ -56,7 +56,7 @@ const VarReferencePicker: FC = ({ nodeId, readonly, className, - isShowNodeName, + isShowNodeName = true, value, onOpen = () => { }, onChange, diff --git a/web/app/components/workflow/nodes/llm/panel.tsx b/web/app/components/workflow/nodes/llm/panel.tsx index 1c2ec3c985..d3d3601f03 100644 --- a/web/app/components/workflow/nodes/llm/panel.tsx +++ b/web/app/components/workflow/nodes/llm/panel.tsx @@ -4,8 +4,8 @@ import { useTranslation } from 'react-i18next' import { RiQuestionLine } from '@remixicon/react' import MemoryConfig from '../_base/components/memory-config' import VarReferencePicker from '../_base/components/variable/var-reference-picker' +import ConfigVision from '../_base/components/config-vision' import useConfig from './use-config' -import ResolutionPicker from './components/resolution-picker' import type { LLMNodeType } from './types' import ConfigPrompt from './components/config-prompt' import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list' @@ -14,14 +14,13 @@ import Field from '@/app/components/workflow/nodes/_base/components/field' import Split from '@/app/components/workflow/nodes/_base/components/split' import ModelParameterModal from '@/app/components/header/account-setting/model-provider-page/model-parameter-modal' import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars' -import { Resolution } from '@/types/app' import { InputVarType, type NodePanelProps } from '@/app/components/workflow/types' import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form' import type { Props as FormProps } from '@/app/components/workflow/nodes/_base/components/before-run-form/form' import ResultPanel from '@/app/components/workflow/run/result-panel' import TooltipPlus from '@/app/components/base/tooltip-plus' import Editor from '@/app/components/workflow/nodes/_base/components/prompt/editor' -import Switch from '@/app/components/base/switch' + const i18nPrefix = 'workflow.nodes.llm' const Panel: FC> = ({ @@ -37,7 +36,7 @@ const Panel: FC> = ({ isChatMode, isCompletionModel, shouldShowContextTip, - isShowVisionConfig, + isVisionModel, handleModelChanged, hasSetBlockStatus, handleCompletionParamsChange, @@ -103,7 +102,7 @@ const Panel: FC> = ({ ) } - if (isShowVisionConfig) { + if (isVisionModel) { forms.push( { label: t(`${i18nPrefix}.vision`)!, @@ -259,28 +258,15 @@ const Panel: FC> = ({ )} {/* Vision: GPT4-vision and so on */} - {isShowVisionConfig && ( - <> - - - } - > - {inputs.vision.enabled - ? ( - - ) - : null} - - - - )} +
diff --git a/web/app/components/workflow/nodes/llm/types.ts b/web/app/components/workflow/nodes/llm/types.ts index 0ada4d3728..a7774fca2e 100644 --- a/web/app/components/workflow/nodes/llm/types.ts +++ b/web/app/components/workflow/nodes/llm/types.ts @@ -1,5 +1,4 @@ -import type { Resolution } from '@/types/app' -import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable } from '@/app/components/workflow/types' +import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable, VisionSetting } from '@/app/components/workflow/types' export type LLMNodeType = CommonNodeType & { model: ModelConfig @@ -14,8 +13,6 @@ export type LLMNodeType = CommonNodeType & { } vision: { enabled: boolean - configs?: { - detail: Resolution - } + configs?: VisionSetting } } diff --git a/web/app/components/workflow/nodes/llm/use-config.ts b/web/app/components/workflow/nodes/llm/use-config.ts index 38051f5944..e4c973a99c 100644 --- a/web/app/components/workflow/nodes/llm/use-config.ts +++ b/web/app/components/workflow/nodes/llm/use-config.ts @@ -1,7 +1,7 @@ import { useCallback, useEffect, useRef, useState } from 'react' import produce from 'immer' import { EditionType, VarType } from '../../types' -import type { Memory, PromptItem, ValueSelector, Var, Variable } from '../../types' +import type { Memory, PromptItem, ValueSelector, Var, Variable, VisionSetting } from '../../types' import { useStore } from '../../store' import { useIsChatMode, @@ -147,13 +147,13 @@ const useConfig = (id: string, payload: LLMNodeType) => { model: model.name, }, ) - const isShowVisionConfig = !!currModel?.features?.includes(ModelFeatureEnum.vision) + const isVisionModel = !!currModel?.features?.includes(ModelFeatureEnum.vision) // change to vision model to set vision enabled, else disabled useEffect(() => { if (!modelChanged) return setModelChanged(false) - if (!isShowVisionConfig) { + if (!isVisionModel) { const newInputs = produce(inputs, (draft) => { draft.vision = { enabled: false, @@ -169,6 +169,7 @@ const useConfig = (id: string, payload: LLMNodeType) => { enabled: true, configs: { detail: Resolution.high, + valueSelector: [], }, } } @@ -176,7 +177,7 @@ const useConfig = (id: string, payload: LLMNodeType) => { setInputs(newInputs) } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [isShowVisionConfig, modelChanged]) + }, [isVisionModel, modelChanged]) // variables const isShowVars = (() => { @@ -298,31 +299,18 @@ const useConfig = (id: string, payload: LLMNodeType) => { if (!draft.vision) { draft.vision = { enabled, - configs: { - detail: Resolution.high, - }, } } else { draft.vision.enabled = enabled - if (!draft.vision.configs) { - draft.vision.configs = { - detail: Resolution.high, - } - } } }) setInputs(newInputs) }, [inputs, setInputs]) - const handleVisionResolutionChange = useCallback((newResolution: Resolution) => { + const handleVisionResolutionChange = useCallback((config: VisionSetting) => { const newInputs = produce(inputs, (draft) => { - if (!draft.vision.configs) { - draft.vision.configs = { - detail: Resolution.high, - } - } - draft.vision.configs.detail = newResolution + draft.vision.configs = config }) setInputs(newInputs) }, [inputs, setInputs]) @@ -425,7 +413,7 @@ const useConfig = (id: string, payload: LLMNodeType) => { isCompletionModel, hasSetBlockStatus, shouldShowContextTip, - isShowVisionConfig, + isVisionModel, handleModelChanged, handleCompletionParamsChange, isShowVars, diff --git a/web/app/components/workflow/types.ts b/web/app/components/workflow/types.ts index 69cb7d8fe4..b34295e6f9 100644 --- a/web/app/components/workflow/types.ts +++ b/web/app/components/workflow/types.ts @@ -347,5 +347,5 @@ export type UploadFileSetting = { export type VisionSetting = { valueSelector: ValueSelector - resolution: Resolution + detail: Resolution } diff --git a/web/i18n/en-US/app-debug.ts b/web/i18n/en-US/app-debug.ts index 7e92acf48c..4d88dc5168 100644 --- a/web/i18n/en-US/app-debug.ts +++ b/web/i18n/en-US/app-debug.ts @@ -356,6 +356,7 @@ const translation = { vision: { name: 'Vision', description: 'Enable Vision will allows the model to take in images and answer questions about them. ', + onlySupportVisionModelTip: 'Only supports vision models', settings: 'Settings', visionSettings: { title: 'Vision Settings', diff --git a/web/i18n/zh-Hans/app-debug.ts b/web/i18n/zh-Hans/app-debug.ts index 52542ef830..2e62a51114 100644 --- a/web/i18n/zh-Hans/app-debug.ts +++ b/web/i18n/zh-Hans/app-debug.ts @@ -351,6 +351,7 @@ const translation = { vision: { name: '视觉', description: '开启视觉功能将允许模型输入图片,并根据图像内容的理解回答用户问题', + onlySupportVisionModelTip: '只有视觉模型配置视觉功能', settings: '设置', visionSettings: { title: '视觉设置',