+
{/* {JSON.stringify(inputs, null, 2)} */}
> = ({
/>
-
+
+
+
diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts
index 1b85bfc0b5..07db5dba15 100644
--- a/web/app/components/workflow/nodes/knowledge-retrieval/types.ts
+++ b/web/app/components/workflow/nodes/knowledge-retrieval/types.ts
@@ -1,7 +1,14 @@
-import type { CommonNodeType, ModelConfig, ValueSelector } from '@/app/components/workflow/types'
+import type {
+ CommonNodeType,
+ ModelConfig,
+ Node,
+ NodeOutPutVar,
+ ValueSelector,
+} from '@/app/components/workflow/types'
import type { RETRIEVE_TYPE } from '@/types/app'
import type {
DataSet,
+ MetadataInDoc,
RerankingModeEnum,
} from '@/models/datasets'
@@ -30,6 +37,61 @@ export type SingleRetrievalConfig = {
model: ModelConfig
}
+export enum LogicalOperator {
+ and = 'and',
+ or = 'or',
+}
+
+export enum ComparisonOperator {
+ contains = 'contains',
+ notContains = 'not contains',
+ startWith = 'start with',
+ endWith = 'end with',
+ is = 'is',
+ isNot = 'is not',
+ empty = 'empty',
+ notEmpty = 'not empty',
+ equal = '=',
+ notEqual = '≠',
+ largerThan = '>',
+ lessThan = '<',
+ largerThanOrEqual = '≥',
+ lessThanOrEqual = '≤',
+ isNull = 'is null',
+ isNotNull = 'is not null',
+ in = 'in',
+ notIn = 'not in',
+ allOf = 'all of',
+ exists = 'exists',
+ notExists = 'not exists',
+ before = 'before',
+ after = 'after',
+}
+
+export enum MetadataFilteringModeEnum {
+ disabled = 'disabled',
+ automatic = 'automatic',
+ manual = 'manual',
+}
+
+export enum MetadataFilteringVariableType {
+ string = 'string',
+ number = 'number',
+ time = 'time',
+}
+
+export type MetadataFilteringCondition = {
+ id: string
+ name: string
+ comparison_operator: ComparisonOperator
+ value?: string | number
+}
+
+export type MetadataFilteringConditions = {
+ logical_operator: LogicalOperator
+ conditions: MetadataFilteringCondition[]
+}
+
export type KnowledgeRetrievalNodeType = CommonNodeType & {
query_variable_selector: ValueSelector
dataset_ids: string[]
@@ -37,4 +99,32 @@ export type KnowledgeRetrievalNodeType = CommonNodeType & {
multiple_retrieval_config?: MultipleRetrievalConfig
single_retrieval_config?: SingleRetrievalConfig
_datasets?: DataSet[]
+ metadata_filtering_mode?: MetadataFilteringModeEnum
+ metadata_filtering_conditions?: MetadataFilteringConditions
+ metadata_model_config?: ModelConfig
+}
+
+export type HandleAddCondition = (metadataItem: MetadataInDoc) => void
+export type HandleRemoveCondition = (id: string) => void
+export type HandleUpdateCondition = (id: string, newCondition: MetadataFilteringCondition) => void
+export type HandleToggleConditionLogicalOperator = () => void
+
+export type MetadataShape = {
+ metadataList?: MetadataInDoc[]
+ selectedDatasetsLoaded?: boolean
+ metadataFilteringConditions?: MetadataFilteringConditions
+ handleAddCondition: HandleAddCondition
+ handleRemoveCondition: HandleRemoveCondition
+ handleToggleConditionLogicalOperator: HandleToggleConditionLogicalOperator
+ handleUpdateCondition: HandleUpdateCondition
+ metadataModelConfig?: ModelConfig
+ handleMetadataModelChange?: (model: { modelId: string; provider: string; mode?: string; features?: string[] }) => void
+ handleMetadataCompletionParamsChange?: (params: Record) => void
+ availableStringVars?: NodeOutPutVar[]
+ availableStringNodesWithParent?: Node[]
+ availableNumberVars?: NodeOutPutVar[]
+ availableNumberNodesWithParent?: Node[]
+ isCommonVariable?: boolean
+ availableCommonStringVars?: { name: string; type: string; }[]
+ availableCommonNumberVars?: { name: string; type: string; }[]
}
diff --git a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
index 6b09c611f8..09b730e71b 100644
--- a/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
+++ b/web/app/components/workflow/nodes/knowledge-retrieval/use-config.ts
@@ -6,13 +6,28 @@ import {
} from 'react'
import produce from 'immer'
import { isEqual } from 'lodash-es'
+import { v4 as uuid4 } from 'uuid'
import type { ValueSelector, Var } from '../../types'
import { BlockEnum, VarType } from '../../types'
import {
- useIsChatMode, useNodesReadOnly,
+ useIsChatMode,
+ useNodesReadOnly,
useWorkflow,
} from '../../hooks'
-import type { KnowledgeRetrievalNodeType, MultipleRetrievalConfig } from './types'
+import type {
+ HandleAddCondition,
+ HandleRemoveCondition,
+ HandleToggleConditionLogicalOperator,
+ HandleUpdateCondition,
+ KnowledgeRetrievalNodeType,
+ MetadataFilteringModeEnum,
+ MultipleRetrievalConfig,
+} from './types'
+import {
+ ComparisonOperator,
+ LogicalOperator,
+ MetadataFilteringVariableType,
+} from './types'
import {
getMultipleRetrievalConfig,
getSelectedDatasetsMode,
@@ -25,6 +40,7 @@ import useNodeCrud from '@/app/components/workflow/nodes/_base/hooks/use-node-cr
import useOneStepRun from '@/app/components/workflow/nodes/_base/hooks/use-one-step-run'
import { useCurrentProviderAndModel, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
+import useAvailableVarList from '@/app/components/workflow/nodes/_base/hooks/use-available-var-list'
const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
const { nodesReadOnly: readOnly } = useNodesReadOnly()
@@ -196,13 +212,14 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
setInputs(newInputs)
}, [inputs, setInputs, selectedDatasets, currentRerankModel, currentRerankProvider])
+ const [selectedDatasetsLoaded, setSelectedDatasetsLoaded] = useState(false)
// datasets
useEffect(() => {
(async () => {
const inputs = inputRef.current
const datasetIds = inputs.dataset_ids
if (datasetIds?.length > 0) {
- const { data: dataSetsWithDetail } = await fetchDatasets({ url: '/datasets', params: { page: 1, ids: datasetIds } })
+ const { data: dataSetsWithDetail } = await fetchDatasets({ url: '/datasets', params: { page: 1, ids: datasetIds } as any })
setSelectedDatasets(dataSetsWithDetail)
}
const newInputs = produce(inputs, (draft) => {
@@ -210,6 +227,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
draft._datasets = selectedDatasets
})
setInputs(newInputs)
+ setSelectedDatasetsLoaded(true)
})()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
@@ -287,6 +305,113 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
})
}, [runInputData, setRunInputData])
+ const handleMetadataFilterModeChange = useCallback((newMode: MetadataFilteringModeEnum) => {
+ setInputs(produce(inputRef.current, (draft) => {
+ draft.metadata_filtering_mode = newMode
+ }))
+ }, [setInputs])
+
+ const handleAddCondition = useCallback(({ name, type }) => {
+ let operator: ComparisonOperator = ComparisonOperator.is
+
+ if (type === MetadataFilteringVariableType.number)
+ operator = ComparisonOperator.equal
+
+ const newCondition = {
+ id: uuid4(),
+ name,
+ comparison_operator: operator,
+ }
+
+ const newInputs = produce(inputRef.current, (draft) => {
+ if (draft.metadata_filtering_conditions) {
+ draft.metadata_filtering_conditions.conditions.push(newCondition)
+ }
+ else {
+ draft.metadata_filtering_conditions = {
+ logical_operator: LogicalOperator.and,
+ conditions: [newCondition],
+ }
+ }
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const handleRemoveCondition = useCallback((id) => {
+ const conditions = inputRef.current.metadata_filtering_conditions?.conditions || []
+ const index = conditions.findIndex(c => c.id === id)
+ const newInputs = produce(inputRef.current, (draft) => {
+ if (index > -1)
+ draft.metadata_filtering_conditions?.conditions.splice(index, 1)
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const handleUpdateCondition = useCallback((id, newCondition) => {
+ const conditions = inputRef.current.metadata_filtering_conditions?.conditions || []
+ const index = conditions.findIndex(c => c.id === id)
+ const newInputs = produce(inputRef.current, (draft) => {
+ if (index > -1)
+ draft.metadata_filtering_conditions!.conditions[index] = newCondition
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const handleToggleConditionLogicalOperator = useCallback(() => {
+ const oldLogicalOperator = inputRef.current.metadata_filtering_conditions?.logical_operator
+ const newLogicalOperator = oldLogicalOperator === LogicalOperator.and ? LogicalOperator.or : LogicalOperator.and
+ const newInputs = produce(inputRef.current, (draft) => {
+ draft.metadata_filtering_conditions!.logical_operator = newLogicalOperator
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const handleMetadataModelChange = useCallback((model: { provider: string; modelId: string; mode?: string }) => {
+ const newInputs = produce(inputRef.current, (draft) => {
+ draft.metadata_model_config = {
+ provider: model.provider,
+ name: model.modelId,
+ mode: model.mode || 'chat',
+ completion_params: draft.metadata_model_config?.completion_params || { temperature: 0.7 },
+ }
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const handleMetadataCompletionParamsChange = useCallback((newParams: Record) => {
+ const newInputs = produce(inputRef.current, (draft) => {
+ draft.metadata_model_config = {
+ ...draft.metadata_model_config!,
+ completion_params: newParams,
+ }
+ })
+ setInputs(newInputs)
+ }, [setInputs])
+
+ const filterStringVar = useCallback((varPayload: Var) => {
+ return [VarType.string].includes(varPayload.type)
+ }, [])
+
+ const {
+ availableVars: availableStringVars,
+ availableNodesWithParent: availableStringNodesWithParent,
+ } = useAvailableVarList(id, {
+ onlyLeafNodeVar: false,
+ filterVar: filterStringVar,
+ })
+
+ const filterNumberVar = useCallback((varPayload: Var) => {
+ return [VarType.number].includes(varPayload.type)
+ }, [])
+
+ const {
+ availableVars: availableNumberVars,
+ availableNodesWithParent: availableNumberNodesWithParent,
+ } = useAvailableVarList(id, {
+ onlyLeafNodeVar: false,
+ filterVar: filterNumberVar,
+ })
+
return {
readOnly,
inputs,
@@ -297,6 +422,7 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
handleModelChanged,
handleCompletionParamsChange,
selectedDatasets: selectedDatasets.filter(d => d.name),
+ selectedDatasetsLoaded,
handleOnDatasetsChange,
isShowSingleRun,
hideSingleRun,
@@ -308,6 +434,17 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
runResult,
rerankModelOpen,
setRerankModelOpen,
+ handleMetadataFilterModeChange,
+ handleUpdateCondition,
+ handleAddCondition,
+ handleRemoveCondition,
+ handleToggleConditionLogicalOperator,
+ handleMetadataModelChange,
+ handleMetadataCompletionParamsChange,
+ availableStringVars,
+ availableStringNodesWithParent,
+ availableNumberVars,
+ availableNumberNodesWithParent,
}
}
diff --git a/web/context/debug-configuration.ts b/web/context/debug-configuration.ts
index 61508278d0..3564a0f822 100644
--- a/web/context/debug-configuration.ts
+++ b/web/context/debug-configuration.ts
@@ -1,3 +1,4 @@
+import type { RefObject } from 'react'
import { createContext, useContext } from 'use-context-selector'
import { PromptMode } from '@/models/debug'
import type {
@@ -92,6 +93,7 @@ type IDebugConfiguration = {
showSelectDataSet: () => void
// dataset config
datasetConfigs: DatasetConfigs
+ datasetConfigsRef: RefObject
setDatasetConfigs: (config: DatasetConfigs) => void
hasSetContextVar: boolean
isShowVisionConfig: boolean
@@ -236,6 +238,9 @@ const DebugConfigurationContext = createContext({
datasets: [],
},
},
+ datasetConfigsRef: {
+ current: null,
+ },
setDatasetConfigs: () => { },
hasSetContextVar: false,
isShowVisionConfig: false,
diff --git a/web/hooks/use-metadata.ts b/web/hooks/use-metadata.ts
index 9a078e639a..ee02b312f0 100644
--- a/web/hooks/use-metadata.ts
+++ b/web/hooks/use-metadata.ts
@@ -8,24 +8,24 @@ export type inputType = 'input' | 'select' | 'textarea'
export type metadataType = DocType | 'originInfo' | 'technicalParameters'
type MetadataMap =
- Record<
- metadataType,
- {
- text: string
- allowEdit?: boolean
- icon?: React.ReactNode
- iconName?: string
- subFieldsMap: Record<
- string,
- {
- label: string
- inputType?: inputType
- field?: string
- render?: (value: any, total?: number) => React.ReactNode | string
- }
- >
- }
- >
+ Record<
+ metadataType,
+ {
+ text: string
+ allowEdit?: boolean
+ icon?: React.ReactNode
+ iconName?: string
+ subFieldsMap: Record<
+ string,
+ {
+ label: string
+ inputType?: inputType
+ field?: string
+ render?: (value: any, total?: number) => React.ReactNode | string
+ }
+ >
+ }
+ >
const fieldPrefix = 'datasetDocuments.metadata.field'
@@ -240,7 +240,7 @@ export const useMetadataMap = (): MetadataMap => {
},
'data_source_type': {
label: t(`${fieldPrefix}.originInfo.source`),
- render: value => t(`datasetDocuments.metadata.source.${value}`),
+ render: value => t(`datasetDocuments.metadata.source.${value === 'notion_import' ? 'notion' : value}`),
},
},
},
diff --git a/web/i18n/en-US/billing.ts b/web/i18n/en-US/billing.ts
index cbc83d12a0..ea84927c07 100644
--- a/web/i18n/en-US/billing.ts
+++ b/web/i18n/en-US/billing.ts
@@ -56,6 +56,7 @@ const translation = {
documentsRequestQuota: '{{count,number}}/min Knowledge Request Rate Limit',
documentsRequestQuotaTooltip: 'Specifies the total number of actions a workspace can perform per minute within the knowledge base, including dataset creation, deletion, updates, document uploads, modifications, archiving, and knowledge base queries. This metric is used to evaluate the performance of knowledge base requests. For example, if a Sandbox user performs 10 consecutive hit tests within one minute, their workspace will be temporarily restricted from performing the following actions for the next minute: dataset creation, deletion, updates, and document uploads or modifications. ',
documentProcessingPriority: ' Document Processing',
+ documentProcessingPriorityUpgrade: 'Process more data with higher accuracy at faster speeds.',
priority: {
'standard': 'Standard',
'priority': 'Priority',
diff --git a/web/i18n/en-US/dataset.ts b/web/i18n/en-US/dataset.ts
index 4e1f2549d8..3e251d1bf1 100644
--- a/web/i18n/en-US/dataset.ts
+++ b/web/i18n/en-US/dataset.ts
@@ -168,6 +168,54 @@ const translation = {
preprocessDocument: '{{num}} Preprocess Documents',
allKnowledge: 'All Knowledge',
allKnowledgeDescription: 'Select to display all knowledge in this workspace. Only the Workspace Owner can manage all knowledge.',
+ embeddingModelNotAvailable: 'Embedding model is unavailable.',
+ metadata: {
+ metadata: 'Metadata',
+ addMetadata: 'Add Metadata',
+ chooseTime: 'Choose a time...',
+ createMetadata: {
+ title: 'New Metadata',
+ back: 'Back',
+ type: 'Type',
+ name: 'Name',
+ namePlaceholder: 'Add metadata name',
+ },
+ checkName: {
+ empty: 'Metadata name cannot be empty',
+ invalid: 'Metadata name can only contain lowercase letters, numbers, and underscores and must start with a lowercase letter',
+ },
+ batchEditMetadata: {
+ editMetadata: 'Edit Metadata',
+ editDocumentsNum: 'Editing {{num}} documents',
+ applyToAllSelectDocument: 'Apply to all selected documents',
+ applyToAllSelectDocumentTip: 'Automatically create all the above edited and new metadata for all selected documents, otherwise editing metadata will only apply to documents with it.',
+ multipleValue: 'Multiple Value',
+ },
+ selectMetadata: {
+ search: 'Search metadata',
+ newAction: 'New Metadata',
+ manageAction: 'Manage',
+ },
+ datasetMetadata: {
+ description: 'You can manage all metadata in this knowledge here. Modifications will be synchronized to every document.',
+ addMetaData: 'Add Metadata',
+ values: '{{num}} Values',
+ disabled: 'Disabled',
+ rename: 'Rename',
+ name: 'Name',
+ namePlaceholder: 'Metadata name',
+ builtIn: 'Built-in',
+ builtInDescription: 'Built-in metadata is automatically extracted and generated. It must be enabled before use and cannot be edited.',
+ deleteTitle: 'Confirm to delete',
+ deleteContent: 'Are you sure you want to delete the metadata "{{name}}"',
+ },
+ documentMetadata: {
+ metadataToolTip: 'Metadata serves as a critical filter that enhances the accuracy and relevance of information retrieval. You can modify and add metadata for this document here.',
+ startLabeling: 'Start Labeling',
+ documentInformation: 'Document Information',
+ technicalParameters: 'Technical Parameters',
+ },
+ },
}
export default translation
diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts
index a6dd4deb73..c81d9f8e94 100644
--- a/web/i18n/en-US/workflow.ts
+++ b/web/i18n/en-US/workflow.ts
@@ -429,6 +429,34 @@ const translation = {
url: 'Segmented URL',
metadata: 'Other metadata',
},
+ metadata: {
+ title: 'Metadata Filtering',
+ tip: 'Metadata filtering is the process of using metadata attributes (such as tags, categories, or access permissions) to refine and control the retrieval of relevant information within a system.',
+ options: {
+ disabled: {
+ title: 'Disabled',
+ subTitle: 'Not enabling metadata filtering',
+ },
+ automatic: {
+ title: 'Automatic',
+ subTitle: 'Automatically generate metadata filtering conditions based on user query',
+ desc: 'Automatically generate metadata filtering conditions based on Query Variable',
+ },
+ manual: {
+ title: 'Manual',
+ subTitle: 'Manually add metadata filtering conditions',
+ },
+ },
+ panel: {
+ title: 'Metadata Filter Conditions',
+ conditions: 'Conditions',
+ add: 'Add Condition',
+ search: 'Search metadata',
+ placeholder: 'Enter value',
+ datePlaceholder: 'Choose a time...',
+ select: 'Select variable...',
+ },
+ },
},
http: {
inputVars: 'Input Variables',
@@ -517,6 +545,8 @@ const translation = {
'all of': 'all of',
'exists': 'exists',
'not exists': 'not exists',
+ 'before': 'before',
+ 'after': 'after',
},
optionName: {
image: 'Image',
diff --git a/web/i18n/ja-JP/billing.ts b/web/i18n/ja-JP/billing.ts
index 796043721c..91979cb46a 100644
--- a/web/i18n/ja-JP/billing.ts
+++ b/web/i18n/ja-JP/billing.ts
@@ -55,6 +55,7 @@ const translation = {
documentsRequestQuota: '{{count,number}}/分のナレッジ リクエストのレート制限',
documentsRequestQuotaTooltip: 'ナレッジベース内でワークスペースが1分間に実行できる操作の総数を示します。これには、データセットの作成、削除、更新、ドキュメントのアップロード、修正、アーカイブ、およびナレッジベースクエリが含まれます。この指標は、ナレッジベースリクエストのパフォーマンスを評価するために使用されます。例えば、Sandbox ユーザーが1分間に10回連続でヒットテストを実行した場合、そのワークスペースは次の1分間、データセットの作成、削除、更新、ドキュメントのアップロードや修正などの操作を一時的に実行できなくなります。',
documentProcessingPriority: '文書処理',
+ documentProcessingPriorityUpgrade: 'より高い精度と高速な速度でデータを処理します。',
priority: {
'standard': '標準',
'priority': '優先',
diff --git a/web/i18n/zh-Hans/billing.ts b/web/i18n/zh-Hans/billing.ts
index 5040ca0a59..8b6826bb32 100644
--- a/web/i18n/zh-Hans/billing.ts
+++ b/web/i18n/zh-Hans/billing.ts
@@ -55,6 +55,7 @@ const translation = {
documentsRequestQuota: '{{count,number}}/分钟 知识库请求频率限制',
documentsRequestQuotaTooltip: '指每分钟内,一个空间在知识库中可执行的操作总数,包括数据集的创建、删除、更新,文档的上传、修改、归档,以及知识库查询等,用于评估知识库请求的性能。例如,Sandbox 用户在 1 分钟内连续执行 10 次命中测试,其工作区将在接下来的 1 分钟内无法继续执行以下操作:数据集的创建、删除、更新,文档的上传、修改等操作。',
documentProcessingPriority: '文档处理',
+ documentProcessingPriorityUpgrade: '以更快的速度、更高的精度处理更多的数据。',
priority: {
'standard': '标准',
'priority': '优先',
diff --git a/web/i18n/zh-Hans/dataset.ts b/web/i18n/zh-Hans/dataset.ts
index bedd114b73..064ceb3c03 100644
--- a/web/i18n/zh-Hans/dataset.ts
+++ b/web/i18n/zh-Hans/dataset.ts
@@ -168,6 +168,54 @@ const translation = {
preprocessDocument: '{{num}} 个预处理文档',
allKnowledge: '所有知识库',
allKnowledgeDescription: '选择以显示该工作区内所有知识库。只有工作区所有者才能管理所有知识库。',
+ embeddingModelNotAvailable: 'Embedding 模型不可用。',
+ metadata: {
+ metadata: '元数据',
+ addMetadata: '添加元数据',
+ chooseTime: '选择时间',
+ createMetadata: {
+ title: '新建元数据',
+ back: '返回',
+ type: '类型',
+ name: '名称',
+ namePlaceholder: '添加元数据名称',
+ },
+ checkName: {
+ empty: '元数据名称不能为空',
+ invalid: '元数据名称只能包含小写字母、数字和下划线,并且必须以小写字母开头',
+ },
+ batchEditMetadata: {
+ editMetadata: '编辑元数据',
+ editDocumentsNum: '编辑 {{num}} 个文档',
+ applyToAllSelectDocument: '应用于所有选定文档',
+ applyToAllSelectDocumentTip: '自动为所有选定文档创建上述编辑和新元数据,否则仅对具有元数据的文档应用编辑。',
+ multipleValue: '多个值',
+ },
+ selectMetadata: {
+ search: '搜索元数据',
+ newAction: '新建元数据',
+ manageAction: '管理',
+ },
+ datasetMetadata: {
+ description: '元数据是关于文档的数据,用于描述文档的属性。元数据可以帮助您更好地组织和管理文档。',
+ addMetaData: '添加元数据',
+ values: '{{num}} 个值',
+ disabled: '已禁用',
+ rename: '重命名',
+ name: '名称',
+ namePlaceholder: '元数据名称',
+ builtIn: '内置',
+ builtInDescription: '内置元数据是系统预定义的元数据,您可以在此处查看和管理内置元数据。',
+ deleteTitle: '确定删除',
+ deleteContent: '你确定要删除元数据 "{{name}}" 吗?',
+ },
+ documentMetadata: {
+ metadataToolTip: '元数据是关于文档的数据,用于描述文档的属性。元数据可以帮助您更好地组织和管理文档。',
+ startLabeling: '开始标注',
+ documentInformation: '文档信息',
+ technicalParameters: '技术参数',
+ },
+ },
}
export default translation
diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts
index 523a93b46d..c9bccebcea 100644
--- a/web/i18n/zh-Hans/workflow.ts
+++ b/web/i18n/zh-Hans/workflow.ts
@@ -430,6 +430,34 @@ const translation = {
url: '分段链接',
metadata: '其他元数据',
},
+ metadata: {
+ title: '元数据过滤',
+ tip: '元数据过滤是使用元数据属性(例如标签、类别或访问权限)来细化和控制系统内相关信息的检索过程。',
+ options: {
+ disabled: {
+ title: '禁用',
+ subTitle: '禁用元数据过滤',
+ },
+ automatic: {
+ title: '自动',
+ subTitle: '根据用户查询自动生成元数据过滤条件',
+ desc: '根据 Query Variable 自动生成元数据过滤条件',
+ },
+ manual: {
+ title: '手动',
+ subTitle: '手动添加元数据过滤条件',
+ },
+ },
+ panel: {
+ title: '元数据过滤条件',
+ conditions: '条件',
+ add: '添加条件',
+ search: '搜索元数据',
+ placeholder: '输入值',
+ datePlaceholder: '选择日期...',
+ select: '选择变量...',
+ },
+ },
},
http: {
inputVars: '输入变量',
@@ -518,6 +546,8 @@ const translation = {
'all of': '全部是',
'exists': '存在',
'not exists': '不存在',
+ 'before': '早于',
+ 'after': '晚于',
},
optionName: {
image: '图片',
diff --git a/web/models/datasets.ts b/web/models/datasets.ts
index 223947cc13..12ea4972ca 100644
--- a/web/models/datasets.ts
+++ b/web/models/datasets.ts
@@ -2,6 +2,8 @@ import type { DataSourceNotionPage, DataSourceProvider } from './common'
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
import type { Tag } from '@/app/components/base/tag-management/constant'
import type { IndexingType } from '@/app/components/datasets/create/step-two'
+import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
+import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
export enum DataSourceType {
FILE = 'upload_file',
@@ -21,6 +23,13 @@ export enum ChunkingMode {
parentChild = 'hierarchical_model', // Parent-Child
}
+export type MetadataInDoc = {
+ value: string
+ id: string
+ type: MetadataFilteringVariableType
+ name: string
+}
+
export type DataSet = {
id: string
name: string
@@ -56,6 +65,8 @@ export type DataSet = {
score_threshold: number
score_threshold_enabled: boolean
}
+ built_in_field_enabled: boolean
+ doc_metadata?: MetadataInDoc[]
}
export type ExternalAPIItem = {
@@ -314,6 +325,7 @@ export type SimpleDocumentDetail = InitialDocumentDetail & {
extension: string
}
}
+ doc_metadata?: MetadataItemWithValue[]
}
export type DocumentListResponse = {
diff --git a/web/models/debug.ts b/web/models/debug.ts
index 301248b234..940df9fee6 100644
--- a/web/models/debug.ts
+++ b/web/models/debug.ts
@@ -3,6 +3,11 @@ import type {
RerankingModeEnum,
} from '@/models/datasets'
import type { FileUpload } from '@/app/components/base/features/types'
+import type {
+ MetadataFilteringConditions,
+ MetadataFilteringModeEnum,
+} from '@/app/components/workflow/nodes/knowledge-retrieval/types'
+import type { ModelConfig as NodeModelConfig } from '@/app/components/workflow/types'
export type Inputs = Record
export enum PromptMode {
@@ -10,25 +15,25 @@ export enum PromptMode {
advanced = 'advanced',
}
-export interface PromptItem {
+export type PromptItem = {
role?: PromptRole
text: string
}
-export interface ChatPromptConfig {
+export type ChatPromptConfig = {
prompt: PromptItem[]
}
-export interface ConversationHistoriesRole {
+export type ConversationHistoriesRole = {
user_prefix: string
assistant_prefix: string
}
-export interface CompletionPromptConfig {
+export type CompletionPromptConfig = {
prompt: PromptItem
conversation_histories_role: ConversationHistoriesRole
}
-export interface BlockStatus {
+export type BlockStatus = {
context: boolean
history: boolean
query: boolean
@@ -40,7 +45,7 @@ export enum PromptRole {
assistant = 'assistant',
}
-export interface PromptVariable {
+export type PromptVariable = {
key: string
name: string
type: string // "string" | "number" | "select",
@@ -55,7 +60,7 @@ export interface PromptVariable {
icon_background?: string
}
-export interface CompletionParams {
+export type CompletionParams = {
max_tokens: number
temperature: number
top_p: number
@@ -66,12 +71,12 @@ export interface CompletionParams {
export type ModelId = 'gpt-3.5-turbo' | 'text-davinci-003'
-export interface PromptConfig {
+export type PromptConfig = {
prompt_template: string
prompt_variables: PromptVariable[]
}
-export interface MoreLikeThisConfig {
+export type MoreLikeThisConfig = {
enabled: boolean
}
@@ -79,7 +84,7 @@ export type SuggestedQuestionsAfterAnswerConfig = MoreLikeThisConfig
export type SpeechToTextConfig = MoreLikeThisConfig
-export interface TextToSpeechConfig {
+export type TextToSpeechConfig = {
enabled: boolean
voice?: string
language?: string
@@ -88,7 +93,7 @@ export interface TextToSpeechConfig {
export type CitationConfig = MoreLikeThisConfig
-export interface AnnotationReplyConfig {
+export type AnnotationReplyConfig = {
id: string
enabled: boolean
score_threshold: number
@@ -98,7 +103,7 @@ export interface AnnotationReplyConfig {
}
}
-export interface ModerationContentConfig {
+export type ModerationContentConfig = {
enabled: boolean
preset_response?: string
}
@@ -113,14 +118,14 @@ export type ModerationConfig = MoreLikeThisConfig & {
}
export type RetrieverResourceConfig = MoreLikeThisConfig
-export interface AgentConfig {
+export type AgentConfig = {
enabled: boolean
strategy: AgentStrategy
max_iteration: number
tools: ToolItem[]
}
// frontend use. Not the same as backend
-export interface ModelConfig {
+export type ModelConfig = {
provider: string // LLM Provider: for example "OPENAI"
model_id: string
mode: ModelModeType
@@ -138,12 +143,12 @@ export interface ModelConfig {
dataSets: any[]
agentConfig: AgentConfig
}
-export interface DatasetConfigItem {
+export type DatasetConfigItem = {
enable: boolean
value: number
}
-export interface DatasetConfigs {
+export type DatasetConfigs = {
retrieval_model: RETRIEVE_TYPE
reranking_model: {
reranking_provider_name: string
@@ -170,41 +175,44 @@ export interface DatasetConfigs {
}
}
reranking_enable?: boolean
+ metadata_filtering_mode?: MetadataFilteringModeEnum
+ metadata_filtering_conditions?: MetadataFilteringConditions
+ metadata_model_config?: NodeModelConfig
}
-export interface DebugRequestBody {
+export type DebugRequestBody = {
inputs: Inputs
query: string
completion_params: CompletionParams
model_config: ModelConfig
}
-export interface DebugResponse {
+export type DebugResponse = {
id: string
answer: string
created_at: string
}
-export interface DebugResponseStream {
+export type DebugResponseStream = {
id: string
data: string
created_at: string
}
-export interface FeedBackRequestBody {
+export type FeedBackRequestBody = {
message_id: string
rating: 'like' | 'dislike'
content?: string
from_source: 'api' | 'log'
}
-export interface FeedBackResponse {
+export type FeedBackResponse = {
message_id: string
rating: 'like' | 'dislike'
}
// Log session list
-export interface LogSessionListQuery {
+export type LogSessionListQuery = {
keyword?: string
start?: string // format datetime(YYYY-mm-dd HH:ii)
end?: string // format datetime(YYYY-mm-dd HH:ii)
@@ -212,7 +220,7 @@ export interface LogSessionListQuery {
limit: number // default 20. 1-100
}
-export interface LogSessionListResponse {
+export type LogSessionListResponse = {
data: {
id: string
conversation_id: string
@@ -226,7 +234,7 @@ export interface LogSessionListResponse {
}
// log session detail and debug
-export interface LogSessionDetailResponse {
+export type LogSessionDetailResponse = {
id: string
conversation_id: string
model_provider: string
@@ -240,7 +248,7 @@ export interface LogSessionDetailResponse {
from_source: 'api' | 'log'
}
-export interface SavedMessage {
+export type SavedMessage = {
id: string
answer: string
}
diff --git a/web/service/knowledge/use-dateset.ts b/web/service/knowledge/use-dateset.ts
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts
index 5bced9286e..6dabe7d872 100644
--- a/web/service/knowledge/use-document.ts
+++ b/web/service/knowledge/use-document.ts
@@ -11,7 +11,7 @@ import type { CommonResponse } from '@/models/common'
const NAME_SPACE = 'knowledge/document'
-const useDocumentListKey = [NAME_SPACE, 'documentList']
+export const useDocumentListKey = [NAME_SPACE, 'documentList']
export const useDocumentList = (payload: {
datasetId: string
query: {
diff --git a/web/service/knowledge/use-metadata.ts b/web/service/knowledge/use-metadata.ts
new file mode 100644
index 0000000000..5e9186f539
--- /dev/null
+++ b/web/service/knowledge/use-metadata.ts
@@ -0,0 +1,146 @@
+import type { BuiltInMetadataItem, MetadataBatchEditToServer, MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
+import { del, get, patch, post } from '../base'
+import { useDocumentListKey, useInvalidDocumentList } from './use-document'
+import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'
+import { useInvalid } from '../use-base'
+import type { DocumentDetailResponse } from '@/models/datasets'
+
+const NAME_SPACE = 'dataset-metadata'
+
+export const useDatasetMetaData = (datasetId: string) => {
+ return useQuery<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>({
+ queryKey: [NAME_SPACE, 'dataset', datasetId],
+ queryFn: () => {
+ return get<{ doc_metadata: MetadataItemWithValueLength[], built_in_field_enabled: boolean }>(`/datasets/${datasetId}/metadata`)
+ },
+ })
+}
+
+export const useInvalidDatasetMetaData = (datasetId: string) => {
+ return useInvalid([NAME_SPACE, 'dataset', datasetId])
+}
+
+export const useCreateMetaData = (datasetId: string) => {
+ const invalidDatasetMetaData = useInvalidDatasetMetaData(datasetId)
+ return useMutation({
+ mutationFn: async (payload: BuiltInMetadataItem) => {
+ await post(`/datasets/${datasetId}/metadata`, {
+ body: payload,
+ })
+ await invalidDatasetMetaData()
+ return Promise.resolve(true)
+ },
+ })
+}
+export const useInvalidAllDocumentMetaData = (datasetId: string) => {
+ const queryClient = useQueryClient()
+ return () => {
+ queryClient.invalidateQueries({
+ queryKey: [NAME_SPACE, 'document', datasetId],
+ exact: false, // invalidate all document metadata: [NAME_SPACE, 'document', datasetId, documentId]
+ })
+ }
+}
+
+const useInvalidAllMetaData = (datasetId: string) => {
+ const invalidDatasetMetaData = useInvalidDatasetMetaData(datasetId)
+ const invalidDocumentList = useInvalidDocumentList(datasetId)
+ const invalidateAllDocumentMetaData = useInvalidAllDocumentMetaData(datasetId)
+
+ return async () => {
+ // meta data in dataset
+ await invalidDatasetMetaData()
+ // meta data in document list
+ invalidDocumentList()
+ // meta data in single document
+ await invalidateAllDocumentMetaData() // meta data in document
+ }
+}
+
+export const useRenameMeta = (datasetId: string) => {
+ const invalidateAllMetaData = useInvalidAllMetaData(datasetId)
+ return useMutation({
+ mutationFn: async (payload: MetadataItemWithValueLength) => {
+ await patch(`/datasets/${datasetId}/metadata/${payload.id}`, {
+ body: {
+ name: payload.name,
+ },
+ })
+ await invalidateAllMetaData()
+ },
+ })
+}
+
+export const useDeleteMetaData = (datasetId: string) => {
+ const invalidateAllMetaData = useInvalidAllMetaData(datasetId)
+ return useMutation({
+ mutationFn: async (metaDataId: string) => {
+ // datasetMetaData = datasetMetaData.filter(item => item.id !== metaDataId)
+ await del(`/datasets/${datasetId}/metadata/${metaDataId}`)
+ await invalidateAllMetaData()
+ },
+ })
+}
+
+export const useBuiltInMetaDataFields = () => {
+ return useQuery<{ fields: BuiltInMetadataItem[] }>({
+ queryKey: [NAME_SPACE, 'built-in'],
+ queryFn: () => {
+ return get('/datasets/metadata/built-in')
+ },
+ })
+}
+
+export const useDocumentMetaData = ({ datasetId, documentId }: { datasetId: string, documentId: string }) => {
+ return useQuery({
+ queryKey: [NAME_SPACE, 'document', datasetId, documentId],
+ queryFn: () => {
+ return get(`/datasets/${datasetId}/documents/${documentId}`, { params: { metadata: 'only' } })
+ },
+ })
+}
+
+export const useBatchUpdateDocMetadata = () => {
+ const queryClient = useQueryClient()
+ return useMutation({
+ mutationFn: async (payload: {
+ dataset_id: string
+ metadata_list: MetadataBatchEditToServer
+ }) => {
+ const documentIds = payload.metadata_list.map(item => item.document_id)
+ await post(`/datasets/${payload.dataset_id}/documents/metadata`, {
+ body: {
+ operation_data: payload.metadata_list,
+ },
+ })
+ // meta data in dataset
+ await queryClient.invalidateQueries({
+ queryKey: [NAME_SPACE, 'dataset', payload.dataset_id],
+ })
+ // meta data in document list
+ await queryClient.invalidateQueries({
+ queryKey: [NAME_SPACE, 'dataset', payload.dataset_id],
+ })
+ await queryClient.invalidateQueries({
+ queryKey: [...useDocumentListKey, payload.dataset_id],
+ })
+
+ // meta data in single document
+ await Promise.all(documentIds.map(documentId => queryClient.invalidateQueries(
+ {
+ queryKey: [NAME_SPACE, 'document', payload.dataset_id, documentId],
+ },
+ )))
+ },
+ })
+}
+
+export const useUpdateBuiltInStatus = (datasetId: string) => {
+ const invalidDatasetMetaData = useInvalidDatasetMetaData(datasetId)
+ return useMutation({
+ mutationFn: async (enabled: boolean) => {
+ await post(`/datasets/${datasetId}/metadata/built-in/${enabled ? 'enable' : 'disable'}`)
+ invalidDatasetMetaData()
+ },
+ })
+}