Feat: Modify the parsing method string to an enumeration type. #5467 (#5468)

### What problem does this PR solve?

Feat: Modify the parsing method string to an enumeration type. #5467

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2025-02-28 11:13:56 +08:00 committed by GitHub
parent 5fdfb8d465
commit b2a5482d2c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 135 additions and 84 deletions

View File

@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { useSelectParserList } from '@/hooks/user-setting-hooks';
import { FormInstance } from 'antd';
@ -7,69 +8,82 @@ const ParserListMap = new Map([
[
['pdf'],
[
'naive',
'resume',
'manual',
'paper',
'book',
'laws',
'presentation',
'one',
'qa',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Manual,
DocumentParserType.Paper,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.Presentation,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
],
],
[
['doc', 'docx'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'manual',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Manual,
DocumentParserType.KnowledgeGraph,
],
],
[
['xlsx', 'xls'],
['naive', 'qa', 'table', 'one', 'knowledge_graph'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
],
],
[['ppt', 'pptx'], ['presentation']],
[['ppt', 'pptx'], [DocumentParserType.Presentation]],
[
['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tif', 'tiff', 'webp', 'svg', 'ico'],
['picture'],
[DocumentParserType.Picture],
],
[
['txt'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
],
],
[
['csv'],
[
'naive',
'resume',
'book',
'laws',
'one',
'qa',
'table',
'knowledge_graph',
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
],
],
[['md'], ['naive', 'qa', 'knowledge_graph']],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']],
[
['md'],
[
DocumentParserType.Naive,
DocumentParserType.Qa,
DocumentParserType.KnowledgeGraph,
],
],
[['json'], [DocumentParserType.Naive, DocumentParserType.KnowledgeGraph]],
[['eml'], [DocumentParserType.Email]],
]);
const getParserList = (
@ -84,11 +98,11 @@ const getParserList = (
export const useFetchParserListOnMount = (
documentId: string,
parserId: string,
parserId: DocumentParserType,
documentExtension: string,
form: FormInstance,
) => {
const [selectedTag, setSelectedTag] = useState('');
const [selectedTag, setSelectedTag] = useState<DocumentParserType>();
const parserList = useSelectParserList();
const handleChunkMethodSelectChange = useHandleChunkMethodSelectChange(form);
@ -102,7 +116,15 @@ export const useFetchParserListOnMount = (
}
return getParserList(
['naive', 'resume', 'book', 'laws', 'one', 'qa', 'table'],
[
DocumentParserType.Naive,
DocumentParserType.Resume,
DocumentParserType.Book,
DocumentParserType.Laws,
DocumentParserType.One,
DocumentParserType.Qa,
DocumentParserType.Table,
],
parserList,
);
}, [parserList, documentExtension]);
@ -113,18 +135,27 @@ export const useFetchParserListOnMount = (
const handleChange = (tag: string) => {
handleChunkMethodSelectChange(tag);
setSelectedTag(tag);
setSelectedTag(tag as DocumentParserType);
};
return { parserList: nextParserList, handleChange, selectedTag };
};
const hideAutoKeywords = ['qa', 'table', 'resume', 'knowledge_graph', 'tag'];
const hideAutoKeywords = [
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const useShowAutoKeywords = () => {
const showAutoKeywords = useCallback((selectedTag: string) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
}, []);
const showAutoKeywords = useCallback(
(selectedTag: DocumentParserType | undefined) => {
return hideAutoKeywords.every((x) => selectedTag !== x);
},
[],
);
return showAutoKeywords;
};

View File

@ -19,6 +19,7 @@ import omit from 'lodash/omit';
import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { IParserConfig } from '@/interfaces/database/document';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
@ -38,23 +39,23 @@ import styles from './index.less';
interface IProps extends Omit<IModalManagerChildrenProps, 'showModal'> {
loading: boolean;
onOk: (
parserId: string,
parserId: DocumentParserType | undefined,
parserConfig: IChangeParserConfigRequestBody,
) => void;
showModal?(): void;
parserId: string;
parserId: DocumentParserType;
parserConfig: IParserConfig;
documentExtension: string;
documentId: string;
}
const hidePagesChunkMethods = [
'qa',
'table',
'picture',
'resume',
'one',
'knowledge_graph',
DocumentParserType.Qa,
DocumentParserType.Table,
DocumentParserType.Picture,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.KnowledgeGraph,
];
const ChunkMethodModal: React.FC<IProps> = ({
@ -95,22 +96,23 @@ const ChunkMethodModal: React.FC<IProps> = ({
return (
isPdf &&
hidePagesChunkMethods
.filter((x) => x !== 'one')
.filter((x) => x !== DocumentParserType.One)
.every((x) => x !== selectedTag)
);
}, [selectedTag, isPdf]);
const showMaxTokenNumber =
selectedTag === 'naive' || selectedTag === 'knowledge_graph';
selectedTag === DocumentParserType.Naive ||
selectedTag === DocumentParserType.KnowledgeGraph;
const hideDivider = [showPages, showOne, showMaxTokenNumber].every(
(x) => x === false,
);
const showEntityTypes = selectedTag === 'knowledge_graph';
const showEntityTypes = selectedTag === DocumentParserType.KnowledgeGraph;
const showExcelToHtml =
selectedTag === 'naive' && documentExtension === 'xlsx';
selectedTag === DocumentParserType.Naive && documentExtension === 'xlsx';
const showAutoKeywords = useShowAutoKeywords();
@ -284,7 +286,11 @@ const ChunkMethodModal: React.FC<IProps> = ({
{showMaxTokenNumber && (
<>
<MaxTokenNumber
max={selectedTag === 'knowledge_graph' ? 8192 * 2 : 2048}
max={
selectedTag === DocumentParserType.KnowledgeGraph
? 8192 * 2
: 2048
}
></MaxTokenNumber>
<Delimiter></Delimiter>
</>

View File

@ -1,12 +1,17 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { Form, Select, Switch } from 'antd';
import { upperFirst } from 'lodash';
import { useCallback, useMemo } from 'react';
import EntityTypesItem from '../entity-types-item';
const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId);
};
@ -16,16 +21,16 @@ const enum MethodValue {
}
export const excludedParseMethods = [
'table',
'resume',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
];
export const showGraphRagItems = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showGraphRagItems = (parserId: DocumentParserType | undefined) => {
return !excludedParseMethods.some((x) => x === parserId);
};
// The three types "table", "resume" and "one" do not display this configuration.

View File

@ -1,3 +1,4 @@
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { PlusOutlined } from '@ant-design/icons';
import {
@ -13,22 +14,28 @@ import {
import random from 'lodash/random';
export const excludedParseMethods = [
'table',
'resume',
'one',
'picture',
'knowledge_graph',
'qa',
'tag',
DocumentParserType.Table,
DocumentParserType.Resume,
DocumentParserType.One,
DocumentParserType.Picture,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Qa,
DocumentParserType.Tag,
];
export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
export const showRaptorParseConfiguration = (
parserId: DocumentParserType | undefined,
) => {
return !excludedParseMethods.some((x) => x === parserId);
};
export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
export const excludedTagParseMethods = [
DocumentParserType.Table,
DocumentParserType.KnowledgeGraph,
DocumentParserType.Tag,
];
export const showTagItems = (parserId: string) => {
export const showTagItems = (parserId: DocumentParserType) => {
return !excludedTagParseMethods.includes(parserId);
};

View File

@ -79,4 +79,5 @@ export enum DocumentParserType {
Audio = 'audio',
Email = 'email',
Tag = 'tag',
KnowledgeGraph = 'knowledge_graph',
}

View File

@ -16,6 +16,7 @@ import ParseConfiguration, {
import GraphRagItems, {
showGraphRagItems,
} from '@/components/parse-configuration/graph-rag-items';
import { DocumentParserType } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
import { normFile } from '@/utils/file-util';
@ -127,7 +128,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
return (
<>
{parserId === 'knowledge_graph' && (
{parserId === DocumentParserType.KnowledgeGraph && (
<>
<EntityTypesItem></EntityTypesItem>
<MaxTokenNumber max={8192 * 2}></MaxTokenNumber>
@ -140,7 +141,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
<AutoQuestionsItem></AutoQuestionsItem>
</>
)}
{parserId === 'naive' && (
{parserId === DocumentParserType.Naive && (
<>
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>