feat: llm support vision

2025-08-20 12:39:14 +08:00 · 2024-08-07 11:29:20 +08:00 · 2024-08-07 11:29:20 +08:00 · 0455e4e1a5
commit 0455e4e1a5
parent 251ab5418f
8 changed files with 74 additions and 66 deletions
--- a/web/app/components/workflow/nodes/_base/components/config-vision.tsx
+++ b/web/app/components/workflow/nodes/_base/components/config-vision.tsx
@ -3,31 +3,52 @@ import type { FC } from 'react'
 import React, { useCallback } from 'react'
 import { useTranslation } from 'react-i18next'
 import produce from 'immer'
 import VarReferencePicker from './variable/var-reference-picker'
 import ResolutionPicker from '@/app/components/workflow/nodes/llm/components/resolution-picker'
 import Field from '@/app/components/workflow/nodes/_base/components/field'
 import Switch from '@/app/components/base/switch'
-import type { VisionSetting } from '@/app/components/workflow/types'
+import { type ValueSelector, type Var, VarType, type VisionSetting } from '@/app/components/workflow/types'
-import type { Resolution } from '@/types/app'
+import { Resolution } from '@/types/app'
 import TooltipPlus from '@/app/components/base/tooltip-plus'
 const i18nPrefix = 'workflow.nodes.llm'
 type Props = {
  isVisionModel: boolean
  readOnly: boolean
  enabled: boolean
  onEnabledChange: (enabled: boolean) => void
-  config: VisionSetting
+  nodeId: string
  config?: VisionSetting
  onConfigChange: (config: VisionSetting) => void
 }
 const ConfigVision: FC<Props> = ({
  isVisionModel,
  readOnly,
  enabled,
  onEnabledChange,
-  config,
+  nodeId,
  config = {
    detail: Resolution.high,
    valueSelector: [],
  },
  onConfigChange,
 }) => {
  const { t } = useTranslation()
  const filterVar = useCallback((payload: Var) => {
    return [VarType.file, VarType.arrayFile].includes(payload.type)
  }, [])
  const handleVisionResolutionChange = useCallback((resolution: Resolution) => {
    const newConfig = produce(config, (draft) => {
-      draft.resolution = resolution
+      draft.detail = resolution
    })
    onConfigChange(newConfig)
  }, [config, onConfigChange])
  const handleVarSelectorChange = useCallback((valueSelector: ValueSelector | string) => {
    const newConfig = produce(config, (draft) => {
      draft.valueSelector = valueSelector as ValueSelector
    })
    onConfigChange(newConfig)
  }, [config, onConfigChange])
@ -37,15 +58,29 @@ const ConfigVision: FC<Props> = ({
      title={t(`${i18nPrefix}.vision`)}
      tooltip={t('appDebug.vision.description')!}
      operations={
-        <Switch size='md' defaultValue={enabled} onChange={onEnabledChange} />
+        // disabled={isVisionModel}
        <TooltipPlus hideArrow popupContent={t('appDebug.vision.onlySupportVisionModelTip')!} >
          <Switch disabled={readOnly || !isVisionModel} size='md' defaultValue={!isVisionModel ? false : enabled} onChange={onEnabledChange} />
        </TooltipPlus>
      }
    >
-      {enabled
+      {(enabled || !isVisionModel)
        ? (
-          <ResolutionPicker
+          <div>
-            value={config.resolution}
+            <VarReferencePicker
-            onChange={handleVisionResolutionChange}
+              className='mb-4'
-          />
+              filterVar={filterVar}
              nodeId={nodeId}
              value={config.valueSelector || []}
              onChange={handleVarSelectorChange}
              readonly={readOnly}
            />
            <ResolutionPicker
              value={config.detail}
              onChange={handleVisionResolutionChange}
            />
          </div>
        )
        : null}
--- a/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
+++ b/web/app/components/workflow/nodes/_base/components/variable/var-reference-picker.tsx
@ -37,7 +37,7 @@ const TRIGGER_DEFAULT_WIDTH = 227
 type Props = {
  className?: string
  nodeId: string
-  isShowNodeName: boolean
+  isShowNodeName?: boolean
  readonly: boolean
  value: ValueSelector | string
  onChange: (value: ValueSelector | string, varKindType: VarKindType, varInfo?: Var) => void
@ -56,7 +56,7 @@ const VarReferencePicker: FC<Props> = ({
  nodeId,
  readonly,
  className,
-  isShowNodeName,
+  isShowNodeName = true,
  value,
  onOpen = () => { },
  onChange,
--- a/web/app/components/workflow/nodes/llm/panel.tsx
+++ b/web/app/components/workflow/nodes/llm/panel.tsx
@ -4,8 +4,8 @@ import { useTranslation } from 'react-i18next'
 import { RiQuestionLine } from '@remixicon/react'
 import MemoryConfig from '../_base/components/memory-config'
 import VarReferencePicker from '../_base/components/variable/var-reference-picker'
 import ConfigVision from '../_base/components/config-vision'
 import useConfig from './use-config'
 import ResolutionPicker from './components/resolution-picker'
 import type { LLMNodeType } from './types'
 import ConfigPrompt from './components/config-prompt'
 import VarList from '@/app/components/workflow/nodes/_base/components/variable/var-list'
@ -14,14 +14,13 @@ import Field from '@/app/components/workflow/nodes/_base/components/field'
 import Split from '@/app/components/workflow/nodes/_base/components/split'
 import ModelParameterModal from '@/app/components/header/account-setting/model-provider-page/model-parameter-modal'
 import OutputVars, { VarItem } from '@/app/components/workflow/nodes/_base/components/output-vars'
 import { Resolution } from '@/types/app'
 import { InputVarType, type NodePanelProps } from '@/app/components/workflow/types'
 import BeforeRunForm from '@/app/components/workflow/nodes/_base/components/before-run-form'
 import type { Props as FormProps } from '@/app/components/workflow/nodes/_base/components/before-run-form/form'
 import ResultPanel from '@/app/components/workflow/run/result-panel'
 import TooltipPlus from '@/app/components/base/tooltip-plus'
 import Editor from '@/app/components/workflow/nodes/_base/components/prompt/editor'
-import Switch from '@/app/components/base/switch'
+
 const i18nPrefix = 'workflow.nodes.llm'
 const Panel: FC<NodePanelProps<LLMNodeType>> = ({
@ -37,7 +36,7 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
    isChatMode,
    isCompletionModel,
    shouldShowContextTip,
-    isShowVisionConfig,
+    isVisionModel,
    handleModelChanged,
    hasSetBlockStatus,
    handleCompletionParamsChange,
@ -103,7 +102,7 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
      )
    }
-    if (isShowVisionConfig) {
+    if (isVisionModel) {
      forms.push(
        {
          label: t(`${i18nPrefix}.vision`)!,
@ -259,28 +258,15 @@ const Panel: FC<NodePanelProps<LLMNodeType>> = ({
        )}
        {/* Vision: GPT4-vision and so on */}
-        {isShowVisionConfig && (
+        <ConfigVision
-          <>
+          nodeId={id}
-            <Split />
+          readOnly={readOnly}
-            <Field
+          isVisionModel={isVisionModel}
-              title={t(`${i18nPrefix}.vision`)}
+          enabled={inputs.vision.enabled}
-              tooltip={t('appDebug.vision.description')!}
+          onEnabledChange={handleVisionResolutionEnabledChange}
-              operations={
+          config={inputs.vision.configs}
-                <Switch size='md' defaultValue={inputs.vision.enabled} onChange={handleVisionResolutionEnabledChange} />
+          onConfigChange={handleVisionResolutionChange}
-              }
+        />
            >
              {inputs.vision.enabled
                ? (
                  <ResolutionPicker
                    value={inputs.vision.configs?.detail || Resolution.high}
                    onChange={handleVisionResolutionChange}
                  />
                )
                : null}
            </Field>
          </>
        )}
      </div>
      <Split />
      <div className='px-4 pt-4 pb-2'>
--- a/web/app/components/workflow/nodes/llm/types.ts
+++ b/web/app/components/workflow/nodes/llm/types.ts
@ -1,5 +1,4 @@
-import type { Resolution } from '@/types/app'
+import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable, VisionSetting } from '@/app/components/workflow/types'
 import type { CommonNodeType, Memory, ModelConfig, PromptItem, ValueSelector, Variable } from '@/app/components/workflow/types'
 export type LLMNodeType = CommonNodeType & {
  model: ModelConfig
@ -14,8 +13,6 @@ export type LLMNodeType = CommonNodeType & {
  }
  vision: {
    enabled: boolean
-    configs?: {
+    configs?: VisionSetting
      detail: Resolution
    }
  }
 }
--- a/web/app/components/workflow/nodes/llm/use-config.ts
+++ b/web/app/components/workflow/nodes/llm/use-config.ts
@ -1,7 +1,7 @@
 import { useCallback, useEffect, useRef, useState } from 'react'
 import produce from 'immer'
 import { EditionType, VarType } from '../../types'
-import type { Memory, PromptItem, ValueSelector, Var, Variable } from '../../types'
+import type { Memory, PromptItem, ValueSelector, Var, Variable, VisionSetting } from '../../types'
 import { useStore } from '../../store'
 import {
  useIsChatMode,
@ -147,13 +147,13 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      model: model.name,
    },
  )
-  const isShowVisionConfig = !!currModel?.features?.includes(ModelFeatureEnum.vision)
+  const isVisionModel = !!currModel?.features?.includes(ModelFeatureEnum.vision)
  // change to vision model to set vision enabled, else disabled
  useEffect(() => {
    if (!modelChanged)
      return
    setModelChanged(false)
-    if (!isShowVisionConfig) {
+    if (!isVisionModel) {
      const newInputs = produce(inputs, (draft) => {
        draft.vision = {
          enabled: false,
@ -169,6 +169,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
            enabled: true,
            configs: {
              detail: Resolution.high,
              valueSelector: [],
            },
          }
        }
@ -176,7 +177,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      setInputs(newInputs)
    }
  // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [isShowVisionConfig, modelChanged])
+  }, [isVisionModel, modelChanged])
  // variables
  const isShowVars = (() => {
@ -298,31 +299,18 @@ const useConfig = (id: string, payload: LLMNodeType) => {
      if (!draft.vision) {
        draft.vision = {
          enabled,
          configs: {
            detail: Resolution.high,
          },
        }
      }
      else {
        draft.vision.enabled = enabled
        if (!draft.vision.configs) {
          draft.vision.configs = {
            detail: Resolution.high,
          }
        }
      }
    })
    setInputs(newInputs)
  }, [inputs, setInputs])
-  const handleVisionResolutionChange = useCallback((newResolution: Resolution) => {
+  const handleVisionResolutionChange = useCallback((config: VisionSetting) => {
    const newInputs = produce(inputs, (draft) => {
-      if (!draft.vision.configs) {
+      draft.vision.configs = config
        draft.vision.configs = {
          detail: Resolution.high,
        }
      }
      draft.vision.configs.detail = newResolution
    })
    setInputs(newInputs)
  }, [inputs, setInputs])
@ -425,7 +413,7 @@ const useConfig = (id: string, payload: LLMNodeType) => {
    isCompletionModel,
    hasSetBlockStatus,
    shouldShowContextTip,
-    isShowVisionConfig,
+    isVisionModel,
    handleModelChanged,
    handleCompletionParamsChange,
    isShowVars,
--- a/web/app/components/workflow/types.ts
+++ b/web/app/components/workflow/types.ts
@ -347,5 +347,5 @@ export type UploadFileSetting = {
 export type VisionSetting = {
  valueSelector: ValueSelector
-  resolution: Resolution
+  detail: Resolution
 }
--- a/web/i18n/en-US/app-debug.ts
+++ b/web/i18n/en-US/app-debug.ts
@ -356,6 +356,7 @@ const translation = {
  vision: {
    name: 'Vision',
    description: 'Enable Vision will allows the model to take in images and answer questions about them. ',
    onlySupportVisionModelTip: 'Only supports vision models',
    settings: 'Settings',
    visionSettings: {
      title: 'Vision Settings',
--- a/web/i18n/zh-Hans/app-debug.ts
+++ b/web/i18n/zh-Hans/app-debug.ts
@ -351,6 +351,7 @@ const translation = {
  vision: {
    name: '视觉',
    description: '开启视觉功能将允许模型输入图片，并根据图像内容的理解回答用户问题',
    onlySupportVisionModelTip: '只有视觉模型配置视觉功能',
    settings: '设置',
    visionSettings: {
      title: '视觉设置',