feat: Configurable for excel, html table or row based text #2516 (#2538)

### What problem does this PR solve?

feat: Configurable for excel, html table or row based text #2516

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
This commit is contained in:
balibabu 2024-09-23 14:58:51 +08:00 committed by GitHub
parent db1be22a2f
commit dd019e7ba1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 32 additions and 0 deletions

View File

@ -24,6 +24,7 @@ import { useFetchParserListOnMount } from './hooks';
import { useTranslate } from '@/hooks/common-hooks';
import Delimiter from '../delimiter';
import EntityTypesItem from '../entity-types-item';
import ExcelToHtml from '../excel-to-html';
import LayoutRecognize from '../layout-recognize';
import ParseConfiguration, {
showRaptorParseConfiguration,
@ -104,6 +105,9 @@ const ChunkMethodModal: React.FC<IProps> = ({
const showEntityTypes = selectedTag === 'knowledge_graph';
const showExcelToHtml =
selectedTag === 'naive' && documentExtension === 'xlsx';
const afterClose = () => {
form.resetFields();
};
@ -279,6 +283,7 @@ const ChunkMethodModal: React.FC<IProps> = ({
<Delimiter></Delimiter>
</>
)}
{showExcelToHtml && <ExcelToHtml></ExcelToHtml>}
{showRaptorParseConfiguration(selectedTag) && (
<ParseConfiguration></ParseConfiguration>
)}

View File

@ -0,0 +1,19 @@
import { useTranslate } from '@/hooks/common-hooks';
import { Form, Switch } from 'antd';
const ExcelToHtml = () => {
const { t } = useTranslate('knowledgeDetails');
return (
<Form.Item
name={['parser_config', 'html4excel']}
label={t('html4excel')}
initialValue={false}
valuePropName="checked"
tooltip={t('html4excelTip')}
>
<Switch />
</Form.Item>
);
};
export default ExcelToHtml;

View File

@ -150,6 +150,8 @@ export default {
topK: 'Top-K',
topKTip: `K chunks will be fed into rerank models.`,
delimiter: `Delimiter`,
html4excel: 'Excel to HTML',
html4excelTip: `Excel will be parsed into HTML table or not. If it's FALSE, every row in Excel will be formed as a chunk.`,
},
knowledgeConfiguration: {
titleDescription:

View File

@ -146,6 +146,8 @@ export default {
topK: 'Top-K',
topKTip: `K塊將被送入Rerank型號。`,
delimiter: `分段標識符`,
html4excel: '表格轉HTML',
html4excelTip: `Excel 是否會被解析為 HTML 表格。如果為 FALSEExcel 中的每一行都會形成一個區塊。`,
},
knowledgeConfiguration: {
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',

View File

@ -147,6 +147,8 @@ export default {
topK: 'Top-K',
topKTip: `K块将被送入Rerank型号。`,
delimiter: `分段标识符`,
html4excel: '表格转HTML',
html4excelTip: `Excel 是否将被解析为 HTML 表。如果为 FALSEExcel 中的每一行都将形成一个块。`,
},
knowledgeConfiguration: {
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',

View File

@ -1,5 +1,6 @@
import Delimiter from '@/components/delimiter';
import EntityTypesItem from '@/components/entity-types-item';
import ExcelToHtml from '@/components/excel-to-html';
import LayoutRecognize from '@/components/layout-recognize';
import MaxTokenNumber from '@/components/max-token-number';
import ParseConfiguration, {
@ -124,6 +125,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
<MaxTokenNumber></MaxTokenNumber>
<Delimiter></Delimiter>
<LayoutRecognize></LayoutRecognize>
<ExcelToHtml></ExcelToHtml>
</>
)}
{showRaptorParseConfiguration(parserId) && (