feat: llm support vision

2025-08-19 19:59:10 +08:00 · 2024-08-07 11:29:20 +08:00 · 2024-08-07 11:29:20 +08:00 · 0455e4e1a5
commit 0455e4e1a5
parent 251ab5418f
8 changed files with 74 additions and 66 deletions
--- a/web/app/components/workflow/nodes/_base/components/config-vision.tsx
+++ b/web/app/components/workflow/nodes/_base/components/config-vision.tsx
@ -3,31 +3,52 @@ import type { FC } from 'react'
 import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import produce from 'immer'
+import VarReferencePicker from './variable/var-reference-picker'
 import ResolutionPicker from '@/app/components/workflow/nodes/llm/components/resolution-picker'
 import Field from '@/app/components/workflow/nodes/_base/components/field'
 import Switch from '@/app/components/base/switch'
-import type { VisionSetting } from '@/app/components/workflow/types'
-import type { Resolution } from '@/types/app'
+import { type ValueSelector, type Var, VarType, type VisionSetting } from '@/app/components/workflow/types'
+import { Resolution } from '@/types/app'
+import TooltipPlus from '@/app/components/base/tooltip-plus'
 const i18nPrefix = 'workflow.nodes.llm'

 type Props = {
+  isVisionModel: boolean
+  readOnly: boolean
  enabled: boolean
  onEnabledChange: (enabled: boolean) => void
-  config: VisionSetting
+  nodeId: string
+  config?: VisionSetting
  onConfigChange: (config: VisionSetting) => void
 }

 const ConfigVision: FC<Props> = ({
+  isVisionModel,
+  readOnly,
  enabled,
  onEnabledChange,
-  config,
+  nodeId,
+  config = {
+    detail: Resolution.high,
+    valueSelector: [],
+  },
  onConfigChange,
 }) => {
  const { t } = useTranslation()

+  const filterVar = useCallback((payload: Var) => {
+    return [VarType.file, VarType.arrayFile].includes(payload.type)
+  }, [])
  const handleVisionResolutionChange = useCallback((resolution: Resolution) => {
    const newConfig = produce(config, (draft) => {
-      draft.resolution = resolution
+      draft.detail = resolution
+    })
+    onConfigChange(newConfig)
+  }, [config, onConfigChange])
+
+  const handleVarSelectorChange = useCallback((valueSelector: ValueSelector | string) => {
+    const newConfig = produce(config, (draft) => {
+      draft.valueSelector = valueSelector as ValueSelector
    })
    onConfigChange(newConfig)
  }, [config, onConfigChange])
@ -37,15 +58,29 @@ const ConfigVision: FC<Props> = ({
      title={t(`${i18nPrefix}.vision`)}
      tooltip={t('appDebug.vision.description')!}
      operations={
-        <Switch size='md' defaultValue={enabled} onChange={onEnabledChange} />
+        // disabled={isVisionModel}
+        <TooltipPlus hideArrow popupContent={t('appDebug.vision.onlySupportVisionModelTip')!} >
+          <Switch disabled={readOnly || !isVisionModel} size='md' defaultValue={!isVisionModel ? false : enabled} onChange={onEnabledChange} />
+        </TooltipPlus>
      }
    >
-      {enabled
+      {(enabled || !isVisionModel)
        ? (
-          <ResolutionPicker
-            value={config.resolution}
-            onChange={handleVisionResolutionChange}
-          />
+          <div>
+            <VarReferencePicker
+              className='mb-4'
+              filterVar={filterVar}
+              nodeId={nodeId}
+              value={config.valueSelector || []}
+              onChange={handleVarSelectorChange}
+              readonly={readOnly}
+            />
+            <ResolutionPicker
+              value={config.detail}
+              onChange={handleVisionResolutionChange}
+            />
+          </div>
+
        )
        : null}

--- a/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
+++ b/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
@ -37,7 +37,7 @@ const TRIGGER_DEFAULT_WIDTH = 227
 type Props = {
  className?: string
  nodeId: string
-  isShowNodeName: boolean
+  isShowNodeName?: boolean
  readonly: boolean
  value: ValueSelector | string
  onChange: (value: ValueSelector | string, varKindType: VarKindType, varInfo?: Var) => void
@ -56,7 +56,7 @@ const VarReferencePicker: FC<Props> = ({
  nodeId,
  readonly,
  className,
-  isShowNodeName,
+  isShowNodeName = true,
  value,
  onOpen = () => { },
  onChange,
--- a/web/app/components/workflow/nodes/llm/panel.tsx
+++ b/web/app/components/workflow/nodes/llm/panel.tsx
@ -4,8 +4,8 @@ import { useTranslation } from 'react-i18next'
 import { RiQuestionLine } from '@remixicon/react'
 import MemoryConfig from '../_base/components/memory-config'
 import VarReferencePicker from '../_base/components/variable/var-reference-picker'
+import ConfigVision from '../_base/components/config-vision'
 import useConfig from './use-config'
-import ResolutionPicker from './components/resolution-picker'
 import type { LLMNodeType } from './types'
 import ConfigPrompt from './components/config-prompt'
 import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list'
@ -14,14 +14,13 @@ import Field from '@/app/components/workflow/nodes/_base/components/field'
 import Split from '@/app/components/workflow/nodes/_base/components/split'
 import ModelParameterModal from '@/app/components/header/account-setting/model-provider-page/model-parameter-modal'
 import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars'
-import { Resolution } from '@/types/app'
 import { InputVarType, type NodePanelProps } from '@/app/components/workflow/types'
 import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
 import type { Props as FormProps } from '@/app/components/workflow/nodes/_base/components/before-run-form/form'
 import ResultPanel from '@/app/components/workflow/run/result-panel'
 import TooltipPlus from '@/app/components/base/tooltip-plus'
 import Editor from '@/app/components/workflow/nodes/_base/components/prompt/editor'
-import Switch from '@/app/components/base/switch'
+
 const i18nPrefix = 'workflow.nodes.llm'

 const Panel: FC<NodePanelProps<LLMNodeType>> = ({
@ -37,7 +36,7 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
    isChatMode,
    isCompletionModel,
    shouldShowContextTip,
-    isShowVisionConfig,
+    isVisionModel,
    handleModelChanged,
    hasSetBlockStatus,
    handleCompletionParamsChange,
@ -103,7 +102,7 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
      )
    }

-    if (isShowVisionConfig) {
+    if (isVisionModel) {
      forms.push(
        {
          label: t(`${i18nPrefix}.vision`)!,
@ -259,28 +258,15 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
        )}

        {/* Vision: GPT4-vision and so on */}
-        {isShowVisionConfig && (
-          <>
-            <Split />
-            <Field
-              title={t(`${i18nPrefix}.vision`)}
-              tooltip={t('appDebug.vision.description')!}
-              operations={
-                <Switch size='md' defaultValue={inputs.vision.enabled} onChange={handleVisionResolutionEnabledChange} />
-              }
-            >
-              {inputs.vision.enabled
-                ? (
-                  <ResolutionPicker
-                    value={inputs.vision.configs?.detail || Resolution.high}
-                    onChange={handleVisionResolutionChange}
-                  />
-                )
-                : null}
-
-            </Field>
-          </>
-        )}
+        <ConfigVision
+          nodeId={id}
+          readOnly={readOnly}
+          isVisionModel={isVisionModel}
+          enabled={inputs.vision.enabled}
+          onEnabledChange={handleVisionResolutionEnabledChange}
+          config={inputs.vision.configs}
+          onConfigChange={handleVisionResolutionChange}
+        />
      </div>
      <Split />
      <div className='px-4 pt-4 pb-2'>
--- a/web/app/components/workflow/nodes/llm/types.ts
+++ b/web/app/components/workflow/nodes/llm/types.ts
@ -1,5 +1,4 @@
-import type { Resolution } from '@/types/app'
-import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable } from '@/app/components/workflow/types'
+import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable, VisionSetting } from '@/app/components/workflow/types'

 export type LLMNodeType = CommonNodeType & {
  model: ModelConfig
@ -14,8 +13,6 @@ export type LLMNodeType = CommonNodeType & {
  }
  vision: {
    enabled: boolean
-    configs?: {
-      detail: Resolution
-    }
+    configs?: VisionSetting
  }
 }
--- a/web/app/components/workflow/nodes/llm/use-config.ts
+++ b/web/app/components/workflow/nodes/llm/use-config.ts
@ -1,7 +1,7 @@
 import { useCallback, useEffect, useRef, useState } from 'react'
 import produce from 'immer'
 import { EditionType, VarType } from '../../types'
-import type { Memory, PromptItem, ValueSelector, Var, Variable } from '../../types'
+import type { Memory, PromptItem, ValueSelector, Var, Variable, VisionSetting } from '../../types'
 import { useStore } from '../../store'
 import {
  useIsChatMode,
@ -147,13 +147,13 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      model: model.name,
    },
  )
-  const isShowVisionConfig = !!currModel?.features?.includes(ModelFeatureEnum.vision)
+  const isVisionModel = !!currModel?.features?.includes(ModelFeatureEnum.vision)
  // change to vision model to set vision enabled, else disabled
  useEffect(() => {
    if (!modelChanged)
      return
    setModelChanged(false)
-    if (!isShowVisionConfig) {
+    if (!isVisionModel) {
      const newInputs = produce(inputs, (draft) => {
        draft.vision = {
          enabled: false,
@ -169,6 +169,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
            enabled: true,
            configs: {
              detail: Resolution.high,
+              valueSelector: [],
            },
          }
        }
@ -176,7 +177,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      setInputs(newInputs)
    }
  // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [isShowVisionConfig, modelChanged])
+  }, [isVisionModel, modelChanged])

  // variables
  const isShowVars = (() => {
@ -298,31 +299,18 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      if (!draft.vision) {
        draft.vision = {
          enabled,
-          configs: {
-            detail: Resolution.high,
-          },
        }
      }
      else {
        draft.vision.enabled = enabled
-        if (!draft.vision.configs) {
-          draft.vision.configs = {
-            detail: Resolution.high,
-          }
-        }
      }
    })
    setInputs(newInputs)
  }, [inputs, setInputs])

-  const handleVisionResolutionChange = useCallback((newResolution: Resolution) => {
+  const handleVisionResolutionChange = useCallback((config: VisionSetting) => {
    const newInputs = produce(inputs, (draft) => {
-      if (!draft.vision.configs) {
-        draft.vision.configs = {
-          detail: Resolution.high,
-        }
-      }
-      draft.vision.configs.detail = newResolution
+      draft.vision.configs = config
    })
    setInputs(newInputs)
  }, [inputs, setInputs])
@ -425,7 +413,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
    isCompletionModel,
    hasSetBlockStatus,
    shouldShowContextTip,
-    isShowVisionConfig,
+    isVisionModel,
    handleModelChanged,
    handleCompletionParamsChange,
    isShowVars,
--- a/web/app/components/workflow/types.ts
+++ b/web/app/components/workflow/types.ts
@ -347,5 +347,5 @@ export type UploadFileSetting = {

 export type VisionSetting = {
  valueSelector: ValueSelector
-  resolution: Resolution
+  detail: Resolution
 }
--- a/web/i18n/en-US/app-debug.ts
+++ b/web/i18n/en-US/app-debug.ts
@ -356,6 +356,7 @@ const translation = {
  vision: {
    name: 'Vision',
    description: 'Enable Vision will allows the model to take in images and answer questions about them. ',
+    onlySupportVisionModelTip: 'Only supports vision models',
    settings: 'Settings',
    visionSettings: {
      title: 'Vision Settings',
--- a/web/i18n/zh-Hans/app-debug.ts
+++ b/web/i18n/zh-Hans/app-debug.ts
@ -351,6 +351,7 @@ const translation = {
  vision: {
    name: '视觉',
    description: '开启视觉功能将允许模型输入图片，并根据图像内容的理解回答用户问题',
+    onlySupportVisionModelTip: '只有视觉模型配置视觉功能',
    settings: '设置',
    visionSettings: {
      title: '视觉设置',