Feat: Modify the parsing method string to an enumeration type. #5467 (#5468)

### What problem does this PR solve?

Feat: Modify the parsing method string to an enumeration type. #5467

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2025-02-28 11:13:56 +08:00 committed by GitHub
parent 5fdfb8d465
commit b2a5482d2c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 135 additions and 84 deletions

View File

@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks'; import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { useSelectParserList } from '@/hooks/user-setting-hooks'; import { useSelectParserList } from '@/hooks/user-setting-hooks';
import { FormInstance } from 'antd'; import { FormInstance } from 'antd';
@ -7,69 +8,82 @@ const ParserListMap = new Map([
[ [
['pdf'], ['pdf'],
[ [
'naive', DocumentParserType.Naive,
'resume', DocumentParserType.Resume,
'manual', DocumentParserType.Manual,
'paper', DocumentParserType.Paper,
'book', DocumentParserType.Book,
'laws', DocumentParserType.Laws,
'presentation', DocumentParserType.Presentation,
'one', DocumentParserType.One,
'qa', DocumentParserType.Qa,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['doc', 'docx'], ['doc', 'docx'],
[ [
'naive', DocumentParserType.Naive,
'resume', DocumentParserType.Resume,
'book', DocumentParserType.Book,
'laws', DocumentParserType.Laws,
'one', DocumentParserType.One,
'qa', DocumentParserType.Qa,
'manual', DocumentParserType.Manual,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['xlsx', 'xls'], ['xlsx', 'xls'],
['naive', 'qa', 'table', 'one', 'knowledge_graph'], [
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
], ],
[['ppt', 'pptx'], ['presentation']], ],
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
[ [
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'], ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
['picture'], [DocumentParserType.Picture],
], ],
[ [
['txt'], ['txt'],
[ [
'naive', DocumentParserType.Naive,
'resume', DocumentParserType.Resume,
'book', DocumentParserType.Book,
'laws', DocumentParserType.Laws,
'one', DocumentParserType.One,
'qa', DocumentParserType.Qa,
'table', DocumentParserType.Table,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
], ],
], ],
[ [
['csv'], ['csv'],
[ [
'naive', DocumentParserType.Naive,
'resume', DocumentParserType.Resume,
'book', DocumentParserType.Book,
'laws', DocumentParserType.Laws,
'one', DocumentParserType.One,
'qa', DocumentParserType.Qa,
'table', DocumentParserType.Table,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
], ],
], ],
[['md'], ['naive', 'qa', 'knowledge_graph']], [
[['json'], ['naive', 'knowledge_graph']], ['md'],
[['eml'], ['email']], [
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
],
],
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
[['eml'], [DocumentParserType.Email]],
]); ]);
const getParserList = ( const getParserList = (
@ -84,11 +98,11 @@ const getParserList = (
export const useFetchParserListOnMount = ( export const useFetchParserListOnMount = (
documentId: string, documentId: string,
parserId: string, parserId: DocumentParserType,
documentExtension: string, documentExtension: string,
form: FormInstance, form: FormInstance,
) => { ) => {
const [selectedTag, setSelectedTag] = useState(''); const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
const parserList = useSelectParserList(); const parserList = useSelectParserList();
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form); const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);
@ -102,7 +116,15 @@ export const useFetchParserListOnMount = (
} }
return getParserList( return getParserList(
['naive', 'resume', 'book', 'laws', 'one', 'qa', 'table'], [
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
],
parserList, parserList,
); );
}, [parserList, documentExtension]); }, [parserList, documentExtension]);
@ -113,18 +135,27 @@ export const useFetchParserListOnMount = (
const handleChange = (tag: string) => { const handleChange = (tag: string) => {
handleChunkMethodSelectChange(tag); handleChunkMethodSelectChange(tag);
setSelectedTag(tag); setSelectedTag(tag as DocumentParserType);
}; };
return { parserList: nextParserList, handleChange, selectedTag }; return { parserList: nextParserList, handleChange, selectedTag };
}; };
const hideAutoKeywords = ['qa', 'table', 'resume', 'knowledge_graph', 'tag']; const hideAutoKeywords = [
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const useShowAutoKeywords = () => { export const useShowAutoKeywords = () => {
const showAutoKeywords = useCallback((selectedTag: string) => { const showAutoKeywords = useCallback(
(selectedTag: DocumentParserType | undefined) => {
return hideAutoKeywords.every((x) => selectedTag !== x); return hideAutoKeywords.every((x) => selectedTag !== x);
}, []); },
[],
);
return showAutoKeywords; return showAutoKeywords;
}; };

View File

@ -19,6 +19,7 @@ import omit from 'lodash/omit';
import React, { useEffect, useMemo } from 'react'; import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks'; import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { IParserConfig } from '@/interfaces/database/document'; import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document'; import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
@ -38,23 +39,23 @@ import styles from './index.less';
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> { interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
loading: boolean; loading: boolean;
onOk: ( onOk: (
parserId: string, parserId: DocumentParserType | undefined,
parserConfig: IChangeParserConfigRequestBody, parserConfig: IChangeParserConfigRequestBody,
) => void; ) => void;
showModal?(): void; showModal?(): void;
parserId: string; parserId: DocumentParserType;
parserConfig: IParserConfig; parserConfig: IParserConfig;
documentExtension: string; documentExtension: string;
documentId: string; documentId: string;
} }
const hidePagesChunkMethods = [ const hidePagesChunkMethods = [
'qa', DocumentParserType.Qa,
'table', DocumentParserType.Table,
'picture', DocumentParserType.Picture,
'resume', DocumentParserType.Resume,
'one', DocumentParserType.One,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
]; ];
const ChunkMethodModal: React.FC<IProps> = ({ const ChunkMethodModal: React.FC<IProps> = ({
@ -95,22 +96,23 @@ const ChunkMethodModal: React.FC<IProps> = ({
return ( return (
isPdf && isPdf &&
hidePagesChunkMethods hidePagesChunkMethods
.filter((x) => x !== 'one') .filter((x) => x !== DocumentParserType.One)
.every((x) => x !== selectedTag) .every((x) => x !== selectedTag)
); );
}, [selectedTag, isPdf]); }, [selectedTag, isPdf]);
const showMaxTokenNumber = const showMaxTokenNumber =
selectedTag === 'naive' || selectedTag === 'knowledge_graph'; selectedTag === DocumentParserType.Naive ||
selectedTag === DocumentParserType.KnowledgeGraph;
const hideDivider = [showPages, showOne, showMaxTokenNumber].every( const hideDivider = [showPages, showOne, showMaxTokenNumber].every(
(x) => x === false, (x) => x === false,
); );
const showEntityTypes = selectedTag === 'knowledge_graph'; const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;
const showExcelToHtml = const showExcelToHtml =
selectedTag === 'naive' && documentExtension === 'xlsx'; selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';
const showAutoKeywords = useShowAutoKeywords(); const showAutoKeywords = useShowAutoKeywords();
@ -284,7 +286,11 @@ const ChunkMethodModal: React.FC<IProps> = ({
{showMaxTokenNumber && ( {showMaxTokenNumber && (
<> <>
<MaxTokenNumber <MaxTokenNumber
max={selectedTag === 'knowledge_graph' ? 8192 * 2 : 2048} max={
selectedTag === DocumentParserType.KnowledgeGraph
? 8192 * 2
: 2048
}
></MaxTokenNumber> ></MaxTokenNumber>
<Delimiter></Delimiter> <Delimiter></Delimiter>
</> </>

View File

@ -1,12 +1,17 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { Form, Select, Switch } from 'antd'; import { Form, Select, Switch } from 'antd';
import { upperFirst } from 'lodash'; import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react'; import { useCallback, useMemo } from 'react';
import EntityTypesItem from '../entity-types-item'; import EntityTypesItem from '../entity-types-item';
const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag']; const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const showTagItems = (parserId: string) => { export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId); return !excludedTagParseMethods.includes(parserId);
}; };
@ -16,16 +21,16 @@ const enum MethodValue {
} }
export const excludedParseMethods = [ export const excludedParseMethods = [
'table', DocumentParserType.Table,
'resume', DocumentParserType.Resume,
'picture', DocumentParserType.Picture,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
'qa', DocumentParserType.Qa,
'tag', DocumentParserType.Tag,
]; ];
export const showGraphRagItems = (parserId: string) => { export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
return !excludedParseMethods.includes(parserId); return !excludedParseMethods.some((x) => x === parserId);
}; };
// The three types "table", "resume" and "one" do not display this configuration. // The three types "table", "resume" and "one" do not display this configuration.

View File

@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { PlusOutlined } from '@ant-design/icons'; import { PlusOutlined } from '@ant-design/icons';
import { import {
@ -13,22 +14,28 @@ import {
import random from 'lodash/random'; import random from 'lodash/random';
export const excludedParseMethods = [ export const excludedParseMethods = [
'table', DocumentParserType.Table,
'resume', DocumentParserType.Resume,
'one', DocumentParserType.One,
'picture', DocumentParserType.Picture,
'knowledge_graph', DocumentParserType.KnowledgeGraph,
'qa', DocumentParserType.Qa,
'tag', DocumentParserType.Tag,
]; ];
export const showRaptorParseConfiguration = (parserId: string) => { export const showRaptorParseConfiguration = (
return !excludedParseMethods.includes(parserId); parserId: DocumentParserType | undefined,
) => {
return !excludedParseMethods.some((x) => x === parserId);
}; };
export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag']; export const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const showTagItems = (parserId: string) => { export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId); return !excludedTagParseMethods.includes(parserId);
}; };

View File

@ -79,4 +79,5 @@ export enum DocumentParserType {
Audio = 'audio', Audio = 'audio',
Email = 'email', Email = 'email',
Tag = 'tag', Tag = 'tag',
KnowledgeGraph = 'knowledge_graph',
} }

View File

@ -16,6 +16,7 @@ import ParseConfiguration, {
import GraphRagItems, { import GraphRagItems, {
showGraphRagItems, showGraphRagItems,
} from '@/components/parse-configuration/graph-rag-items'; } from '@/components/parse-configuration/graph-rag-items';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks'; import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { normFile } from '@/utils/file-util'; import { normFile } from '@/utils/file-util';
@ -127,7 +128,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
return ( return (
<> <>
{parserId === 'knowledge_graph' && ( {parserId === DocumentParserType.KnowledgeGraph && (
<> <>
<EntityTypesItem></EntityTypesItem> <EntityTypesItem></EntityTypesItem>
<MaxTokenNumber max={8192 * 2}></MaxTokenNumber> <MaxTokenNumber max={8192 * 2}></MaxTokenNumber>
@ -140,7 +141,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
<AutoQuestionsItem></AutoQuestionsItem> <AutoQuestionsItem></AutoQuestionsItem>
</> </>
)} )}
{parserId === 'naive' && ( {parserId === DocumentParserType.Naive && (
<> <>
<MaxTokenNumber></MaxTokenNumber> <MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter> <Delimiter></Delimiter>