feat: Add delimiter field to naive parsing method #1909 (#1911)

### What problem does this PR solve?

feat: Add delimiter field to naive parsing method #1909
### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2024-08-12 15:53:25 +08:00 committed by GitHub
parent ad48e8d915
commit 6baba54e9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 46 additions and 1 deletions

View File

@ -22,6 +22,7 @@ import React, { useEffect, useMemo } from 'react';
import { useFetchParserListOnMount } from './hooks'; import { useFetchParserListOnMount } from './hooks';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import Delimiter from '../delimiter';
import EntityTypesItem from '../entity-types-item'; import EntityTypesItem from '../entity-types-item';
import LayoutRecognize from '../layout-recognize'; import LayoutRecognize from '../layout-recognize';
import ParseConfiguration, { import ParseConfiguration, {
@ -268,7 +269,12 @@ const ChunkMethodModal: React.FC<IProps> = ({
} }
</Form.Item> </Form.Item>
)} )}
{showMaxTokenNumber && <MaxTokenNumber></MaxTokenNumber>} {showMaxTokenNumber && (
<>
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>
</>
)}
{showRaptorParseConfiguration(selectedTag) && ( {showRaptorParseConfiguration(selectedTag) && (
<ParseConfiguration></ParseConfiguration> <ParseConfiguration></ParseConfiguration>
)} )}

View File

@ -0,0 +1,34 @@
import { Form, Input } from 'antd';
import { useTranslation } from 'react-i18next';
interface IProps {
value?: string | undefined;
onChange?: (val: string | undefined) => void;
}
const DelimiterInput = ({ value, onChange }: IProps) => {
const nextValue = value?.replaceAll('\n', '\\n');
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const val = e.target.value;
const nextValue = val.replaceAll('\\n', '\n');
onChange?.(nextValue);
};
return <Input value={nextValue} onChange={handleInputChange}></Input>;
};
const Delimiter = () => {
const { t } = useTranslation();
return (
<Form.Item
name={['parser_config', 'delimiter']}
label={t('knowledgeDetails.delimiter')}
initialValue={`\\n!?;。;!?`}
rules={[{ required: true }]}
>
<DelimiterInput />
</Form.Item>
);
};
export default Delimiter;

View File

@ -148,6 +148,7 @@ export default {
rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`, rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
topK: 'Top-K', topK: 'Top-K',
topKTip: `K chunks will be fed into rerank models.`, topKTip: `K chunks will be fed into rerank models.`,
delimiter: `Delimiter`,
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: titleDescription:

View File

@ -144,6 +144,7 @@ export default {
rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則它使用rerank評分代替矢量餘弦相似性。`, rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則它使用rerank評分代替矢量餘弦相似性。`,
topK: 'Top-K', topK: 'Top-K',
topKTip: `K塊將被送入Rerank型號。`, topKTip: `K塊將被送入Rerank型號。`,
delimiter: `分段標識符`,
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。', titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',

View File

@ -145,6 +145,7 @@ export default {
rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则它使用rerank评分代替矢量余弦相似性。`, rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则它使用rerank评分代替矢量余弦相似性。`,
topK: 'Top-K', topK: 'Top-K',
topKTip: `K块将被送入Rerank型号。`, topKTip: `K块将被送入Rerank型号。`,
delimiter: `分段标识符`,
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。', titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',

View File

@ -1,3 +1,4 @@
import Delimiter from '@/components/delimiter';
import EntityTypesItem from '@/components/entity-types-item'; import EntityTypesItem from '@/components/entity-types-item';
import LayoutRecognize from '@/components/layout-recognize'; import LayoutRecognize from '@/components/layout-recognize';
import MaxTokenNumber from '@/components/max-token-number'; import MaxTokenNumber from '@/components/max-token-number';
@ -111,6 +112,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
{parserId === 'naive' && ( {parserId === 'naive' && (
<> <>
<MaxTokenNumber></MaxTokenNumber> <MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>
<LayoutRecognize></LayoutRecognize> <LayoutRecognize></LayoutRecognize>
</> </>
)} )}