From fef44a71c5bc915efea2bbe4b583a11ea6340f2a Mon Sep 17 00:00:00 2001 From: balibabu Date: Fri, 25 Apr 2025 17:31:28 +0800 Subject: [PATCH] Feat: Save the configuration information of the knowledge base document #3221 (#7317) ### What problem does this PR solve? Feat: Save the configuration information of the knowledge base document #3221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- .../dynamic-page-range.tsx | 88 ++++++++++++++ .../components/chunk-method-dialog/index.tsx | 108 ++++++++++++++---- .../use-default-parser-values.ts | 60 ++++++++++ web/src/components/delimiter-form-field.tsx | 2 +- .../components/file-upload-dialog/index.tsx | 1 + .../raptor-form-fields.tsx | 8 +- .../dataset/dataset/parsing-status-cell.tsx | 48 +++----- .../dataset/use-change-document-parser.ts | 8 +- .../dataset/use-dataset-table-columns.tsx | 2 +- 9 files changed, 265 insertions(+), 60 deletions(-) create mode 100644 web/src/components/chunk-method-dialog/dynamic-page-range.tsx create mode 100644 web/src/components/chunk-method-dialog/use-default-parser-values.ts diff --git a/web/src/components/chunk-method-dialog/dynamic-page-range.tsx b/web/src/components/chunk-method-dialog/dynamic-page-range.tsx new file mode 100644 index 000000000..e833e13d9 --- /dev/null +++ b/web/src/components/chunk-method-dialog/dynamic-page-range.tsx @@ -0,0 +1,88 @@ +'use client'; + +import { Button } from '@/components/ui/button'; +import { + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from '@/components/ui/form'; +import { Input } from '@/components/ui/input'; +import { Plus, Trash2 } from 'lucide-react'; +import { useFieldArray, useFormContext } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; + +export function DynamicPageRange() { + const { t } = useTranslation(); + const form = useFormContext(); + + const { fields, remove, append } = useFieldArray({ + name: 'parser_config.pages', + control: form.control, + }); + + return ( +
+ + {t('knowledgeDetails.pageRanges')} + + {fields.map((field, index) => { + const typeField = `parser_config.pages.${index}.from`; + return ( +
+ ( + + + + + + + + )} + /> + ( + + + + + + + + )} + /> + remove(index)} + /> +
+ ); + })} + +
+ ); +} diff --git a/web/src/components/chunk-method-dialog/index.tsx b/web/src/components/chunk-method-dialog/index.tsx index 764ea4989..fa2fabd10 100644 --- a/web/src/components/chunk-method-dialog/index.tsx +++ b/web/src/components/chunk-method-dialog/index.tsx @@ -15,15 +15,17 @@ import { FormMessage, } from '@/components/ui/form'; import { DocumentParserType } from '@/constants/knowledge'; -import { useTranslate } from '@/hooks/common-hooks'; import { useFetchKnowledgeBaseConfiguration } from '@/hooks/use-knowledge-request'; import { IModalProps } from '@/interfaces/common'; import { IParserConfig } from '@/interfaces/database/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { zodResolver } from '@hookform/resolvers/zod'; +import get from 'lodash/get'; +import omit from 'lodash/omit'; import {} from 'module'; -import { useMemo } from 'react'; +import { useEffect, useMemo } from 'react'; import { useForm, useWatch } from 'react-hook-form'; +import { useTranslation } from 'react-i18next'; import { z } from 'zod'; import { AutoKeywordsFormField, @@ -33,10 +35,7 @@ import { DatasetConfigurationContainer } from '../dataset-configuration-containe import { DelimiterFormField } from '../delimiter-form-field'; import { EntityTypesFormField } from '../entity-types-form-field'; import { ExcelToHtmlFormField } from '../excel-to-html-form-field'; -import { - DocumentType, - LayoutRecognizeFormField, -} from '../layout-recognize-form-field'; +import { LayoutRecognizeFormField } from '../layout-recognize-form-field'; import { MaxTokenNumberFormField } from '../max-token-number-from-field'; import { UseGraphRagFormField, @@ -47,7 +46,12 @@ import RaptorFormFields, { } from '../parse-configuration/raptor-form-fields'; import { Input } from '../ui/input'; import { RAGFlowSelect } from '../ui/select'; +import { DynamicPageRange } from './dynamic-page-range'; import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks'; +import { + useDefaultParserValues, + useFillDefaultValueOnMount, +} from './use-default-parser-values'; const FormId = 'ChunkMethodDialogForm'; @@ -78,8 +82,10 @@ export function ChunkMethodDialog({ parserId, documentId, documentExtension, + visible, + parserConfig, }: IProps) { - const { t } = useTranslate('knowledgeDetails'); + const { t } = useTranslation(); const { parserList } = useFetchParserListOnMount( documentId, @@ -94,6 +100,10 @@ export function ChunkMethodDialog({ return knowledgeDetails.parser_config?.graphrag?.use_graphrag; }, [knowledgeDetails.parser_config?.graphrag?.use_graphrag]); + const defaultParserValues = useDefaultParserValues(); + + const fillDefaultParserValue = useFillDefaultValueOnMount(); + const FormSchema = z.object({ parser_id: z .string() @@ -104,16 +114,34 @@ export function ChunkMethodDialog({ parser_config: z.object({ task_page_size: z.coerce.number(), layout_recognize: z.string(), + chunk_token_num: z.coerce.number(), + delimiter: z.string(), + auto_keywords: z.coerce.number(), + auto_questions: z.coerce.number(), + html4excel: z.boolean(), + raptor: z.object({ + use_raptor: z.boolean().optional(), + prompt: z.string(), + max_token: z.coerce.number(), + threshold: z.coerce.number(), + max_cluster: z.coerce.number(), + random_seed: z.coerce.number(), + }), + graphrag: z.object({ + use_graphrag: z.boolean(), + }), + entity_types: z.array(z.string()), + pages: z.array( + z.object({ from: z.coerce.number(), to: z.coerce.number() }), + ), }), }); const form = useForm>({ resolver: zodResolver(FormSchema), defaultValues: { parser_id: parserId, - parser_config: { - task_page_size: 12, - layout_recognize: DocumentType.DeepDOC, - }, + + parser_config: defaultParserValues, }, }); @@ -155,22 +183,59 @@ export function ChunkMethodDialog({ async function onSubmit(data: z.infer) { console.log('🚀 ~ onSubmit ~ data:', data); - // const ret = await onOk?.(); - // if (ret) { - // hideModal?.(); - // } + const nextData = { + ...data, + parser_config: { + ...data.parser_config, + pages: data.parser_config?.pages?.map((x: any) => [x.from, x.to]) ?? [], + }, + }; + console.log('🚀 ~ onSubmit ~ nextData:', nextData); + const ret = await onOk?.(nextData); + if (ret) { + hideModal?.(); + } } + useEffect(() => { + if (visible) { + const pages = + parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? []; + form.reset({ + parser_id: parserId, + parser_config: fillDefaultParserValue({ + pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }], + ...omit(parserConfig, 'pages'), + graphrag: { + use_graphrag: get( + parserConfig, + 'graphrag.use_graphrag', + useGraphRag, + ), + }, + }), + }); + } + }, [ + fillDefaultParserValue, + form, + knowledgeDetails.parser_config, + parserConfig, + parserId, + useGraphRag, + visible, + ]); + return ( - {t('chunkMethod')} + {t('knowledgeDetails.chunkMethod')}
( - {t('name')} + {t('knowledgeDetails.chunkMethod')} )} /> + {showPages && } {showPages && layoutRecognize && ( ( - - {t('taskPageSize')} + + {t('knowledgeDetails.taskPageSize')} diff --git a/web/src/components/chunk-method-dialog/use-default-parser-values.ts b/web/src/components/chunk-method-dialog/use-default-parser-values.ts new file mode 100644 index 000000000..6b52b50fd --- /dev/null +++ b/web/src/components/chunk-method-dialog/use-default-parser-values.ts @@ -0,0 +1,60 @@ +import { IParserConfig } from '@/interfaces/database/document'; +import { useCallback, useMemo } from 'react'; +import { useTranslation } from 'react-i18next'; +import { DocumentType } from '../layout-recognize-form-field'; + +export function useDefaultParserValues() { + const { t } = useTranslation(); + + const defaultParserValues = useMemo(() => { + const defaultParserValues = { + task_page_size: 12, + layout_recognize: DocumentType.DeepDOC, + chunk_token_num: 512, + delimiter: '\n', + auto_keywords: 0, + auto_questions: 0, + html4excel: false, + raptor: { + use_raptor: false, + prompt: t('knowledgeConfiguration.promptText'), + max_token: 256, + threshold: 0.1, + max_cluster: 64, + random_seed: 0, + }, + graphrag: { + use_graphrag: false, + }, + entity_types: [], + pages: [], + }; + + return defaultParserValues; + }, [t]); + + return defaultParserValues; +} + +export function useFillDefaultValueOnMount() { + const defaultParserValues = useDefaultParserValues(); + + const fillDefaultValue = useCallback( + (parserConfig: IParserConfig) => { + return Object.entries(defaultParserValues).reduce>( + (pre, [key, value]) => { + if (key in parserConfig) { + pre[key] = parserConfig[key as keyof IParserConfig]; + } else { + pre[key] = value; + } + return pre; + }, + {}, + ); + }, + [defaultParserValues], + ); + + return fillDefaultValue; +} diff --git a/web/src/components/delimiter-form-field.tsx b/web/src/components/delimiter-form-field.tsx index 8b5c8d635..479aa917c 100644 --- a/web/src/components/delimiter-form-field.tsx +++ b/web/src/components/delimiter-form-field.tsx @@ -48,7 +48,7 @@ export function DelimiterFormField() { {t('knowledgeDetails.delimiter')} - + diff --git a/web/src/components/file-upload-dialog/index.tsx b/web/src/components/file-upload-dialog/index.tsx index 02e51ef5e..81c854c6b 100644 --- a/web/src/components/file-upload-dialog/index.tsx +++ b/web/src/components/file-upload-dialog/index.tsx @@ -30,6 +30,7 @@ export function UploaderTabs({ setFiles }: UploaderTabsProps) { maxFileCount={8} maxSize={8 * 1024 * 1024} onValueChange={setFiles} + accept={{ '*': [] }} /> {t('common.comingSoon')} diff --git a/web/src/components/parse-configuration/raptor-form-fields.tsx b/web/src/components/parse-configuration/raptor-form-fields.tsx index 1c33d17e8..835108ae7 100644 --- a/web/src/components/parse-configuration/raptor-form-fields.tsx +++ b/web/src/components/parse-configuration/raptor-form-fields.tsx @@ -83,7 +83,7 @@ const RaptorFormFields = () => { render={({ field }) => ( {t('prompt')} - +