From dd019e7ba1573ac1a1dd85d9112ad64f2e11de8b Mon Sep 17 00:00:00 2001 From: balibabu Date: Mon, 23 Sep 2024 14:58:51 +0800 Subject: [PATCH] feat: Configurable for excel, html table or row based text #2516 (#2538) ### What problem does this PR solve? feat: Configurable for excel, html table or row based text #2516 ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): --- .../components/chunk-method-modal/index.tsx | 5 +++++ web/src/components/excel-to-html.tsx | 19 +++++++++++++++++++ web/src/locales/en.ts | 2 ++ web/src/locales/zh-traditional.ts | 2 ++ web/src/locales/zh.ts | 2 ++ .../knowledge-setting/configuration.tsx | 2 ++ 6 files changed, 32 insertions(+) create mode 100644 web/src/components/excel-to-html.tsx diff --git a/web/src/components/chunk-method-modal/index.tsx b/web/src/components/chunk-method-modal/index.tsx index 9b20d3fab..876de5e0f 100644 --- a/web/src/components/chunk-method-modal/index.tsx +++ b/web/src/components/chunk-method-modal/index.tsx @@ -24,6 +24,7 @@ import { useFetchParserListOnMount } from './hooks'; import { useTranslate } from '@/hooks/common-hooks'; import Delimiter from '../delimiter'; import EntityTypesItem from '../entity-types-item'; +import ExcelToHtml from '../excel-to-html'; import LayoutRecognize from '../layout-recognize'; import ParseConfiguration, { showRaptorParseConfiguration, @@ -104,6 +105,9 @@ const ChunkMethodModal: React.FC = ({ const showEntityTypes = selectedTag === 'knowledge_graph'; + const showExcelToHtml = + selectedTag === 'naive' && documentExtension === 'xlsx'; + const afterClose = () => { form.resetFields(); }; @@ -279,6 +283,7 @@ const ChunkMethodModal: React.FC = ({ )} + {showExcelToHtml && } {showRaptorParseConfiguration(selectedTag) && ( )} diff --git a/web/src/components/excel-to-html.tsx b/web/src/components/excel-to-html.tsx new file mode 100644 index 000000000..e236a12f3 --- /dev/null +++ b/web/src/components/excel-to-html.tsx @@ -0,0 +1,19 @@ +import { useTranslate } from '@/hooks/common-hooks'; +import { Form, Switch } from 'antd'; + +const ExcelToHtml = () => { + const { t } = useTranslate('knowledgeDetails'); + return ( + + + + ); +}; + +export default ExcelToHtml; diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index ade4385aa..ed0586f88 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -150,6 +150,8 @@ export default { topK: 'Top-K', topKTip: `K chunks will be fed into rerank models.`, delimiter: `Delimiter`, + html4excel: 'Excel to HTML', + html4excelTip: `Excel will be parsed into HTML table or not. If it's FALSE, every row in Excel will be formed as a chunk.`, }, knowledgeConfiguration: { titleDescription: diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 2060907c3..04589bd6b 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -146,6 +146,8 @@ export default { topK: 'Top-K', topKTip: `K塊將被送入Rerank型號。`, delimiter: `分段標識符`, + html4excel: '表格轉HTML', + html4excelTip: `Excel 是否會被解析為 HTML 表格。如果為 FALSE,Excel 中的每一行都會形成一個區塊。`, }, knowledgeConfiguration: { titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 038945325..796774d7b 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -147,6 +147,8 @@ export default { topK: 'Top-K', topKTip: `K块将被送入Rerank型号。`, delimiter: `分段标识符`, + html4excel: '表格转HTML', + html4excelTip: `Excel 是否将被解析为 HTML 表。如果为 FALSE,Excel 中的每一行都将形成一个块。`, }, knowledgeConfiguration: { titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。', diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx b/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx index 7e55f7608..64b1cbb82 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx @@ -1,5 +1,6 @@ import Delimiter from '@/components/delimiter'; import EntityTypesItem from '@/components/entity-types-item'; +import ExcelToHtml from '@/components/excel-to-html'; import LayoutRecognize from '@/components/layout-recognize'; import MaxTokenNumber from '@/components/max-token-number'; import ParseConfiguration, { @@ -124,6 +125,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => { + )} {showRaptorParseConfiguration(parserId) && (