Feat: Add description for tag parsing method #4368 (#4402)

### What problem does this PR solve?

Feat: Add description for tag parsing method #4368

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2025-01-07 19:33:53 +08:00 committed by GitHub
parent d9a4e4cc3b
commit 49cebd9fec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 244 additions and 18 deletions

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 921 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 371 KiB

View File

@ -286,6 +286,16 @@ export default {
<p>This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.</p>
<p>The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.</p>
<p>Ensure that you set the <b>Entity types</b>.</p>`,
tag: `<p>Knowlege base using 'Tag' as a chunking method is supposed to be used by other knowledge bases to add tags to their chunks, queries to which will also be with tags too.</p>
<p>Knowlege base using 'Tag' as a chunking method is <b>NOT</b> supposed to be involved in RAG procedure.</p>
<p>The chunks in this knowledge base are examples of tags, which demonstrate the entire tag set and the relevance between chunk and tags.</p>
<p>This chunk method supports <b>EXCEL</b> and <b>CSV/TXT</b> file formats.</p>
<p>If a file is in <b>Excel</b> format, it should contain two columns without headers: one for content and the other for tags, with the content column preceding the tags column. Multiple sheets are acceptable, provided the columns are properly structured.</p>
<p>If a file is in <b>CSV/TXT</b> format, it must be UTF-8 encoded with TAB as the delimiter to separate content and tags.</p>
<p>In tags column, there're English <b>comma</b> between tags.</p>
<i>Lines of texts that fail to follow the above rules will be ignored, and each pair will be considered a distinct chunk.</i>
`,
useRaptor: 'Use RAPTOR to enhance retrieval',
useRaptorTip:
'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.',
@ -310,9 +320,11 @@ The above is the content you need to summarize.`,
vietnamese: 'Vietnamese',
pageRank: 'Page rank',
pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`,
tag: 'Tag',
tagName: 'Tag',
frequency: 'Frequency',
searchTags: 'Search tags',
tagCloud: 'Cloud',
tagTable: 'Table',
},
chunk: {
chunk: 'Chunk',

View File

@ -271,6 +271,16 @@ export default {
<p>LLM以提取知識圖譜和思維導圖的節點和關係
<p></p></p>`,
tag: `<p>使用「標籤」作為分塊方法的知識庫應該被其他知識庫用來將標籤加入其區塊中,查詢也將帶有標籤。
<p>使<b></b> RAG
<p>
<p><b>EXCEL</b><b>CSV/TXT</b>
<p> <b>Excel</b>
<p><b>CSV/TXT</b>UTF-8TAB作為分隔符號來分隔內容和標籤
<p><b></b>
<i>
`,
useRaptor: '使用RAPTOR文件增強策略',
useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
prompt: '提示詞',
@ -294,9 +304,11 @@ export default {
pageRank: '頁面排名',
pageRankTip: `這用來提高相關性分數。所有檢索到的區塊的相關性得分將加上該數字。
pagerank `,
tag: '標籤',
tagName: '標籤',
frequency: '頻次',
searchTags: '搜尋標籤',
tagCloud: '雲端',
tagTable: '表',
},
chunk: {
chunk: '解析塊',

View File

@ -288,6 +288,16 @@ export default {
<p> LLM </p>
</p>`,
tag: `<p>使用“标签”作为分块方法的知识库应该被其他知识库使用,以将标签添加到其块中,对这些块的查询也将带有标签。</p>
<p>使<b></b> RAG </p>
<p></p>
<p><b>EXCEL</b><b>CSV/TXT</b></p>
<p><b>Excel</b></p>
<p> <b>CSV/TXT</b> 使 UTF-8 TAB </p>
<p>使 <b></b></p>
<i></i>
`,
useRaptor: '使用召回增强RAPTOR策略',
useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
prompt: '提示词',
@ -311,9 +321,11 @@ export default {
pageRank: '页面排名',
pageRankTip: `这用于提高相关性得分。所有检索到的块的相关性得分将加上此数字。
pagerank `,
tag: '标签',
tagName: '标签',
frequency: '频次',
searchTags: '搜索标签',
tagCloud: '云',
tagTable: '表',
},
chunk: {
chunk: '解析块',

View File

@ -1,20 +1,40 @@
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
import { Segmented } from 'antd';
import { SegmentedLabeledOption } from 'antd/es/segmented';
import { upperFirst } from 'lodash';
import { useState } from 'react';
import { useTranslation } from 'react-i18next';
import { TagTable } from './tag-table';
import { TagWordCloud } from './tag-word-cloud';
enum TagType {
Cloud = 'cloud',
Table = 'table',
}
const TagContentMap = {
[TagType.Cloud]: <TagWordCloud></TagWordCloud>,
[TagType.Table]: <TagTable></TagTable>,
};
export function TagTabs() {
const [value, setValue] = useState<TagType>(TagType.Cloud);
const { t } = useTranslation();
const options: SegmentedLabeledOption[] = [TagType.Cloud, TagType.Table].map(
(x) => ({
label: t(`knowledgeConfiguration.tag${upperFirst(x)}`),
value: x,
}),
);
return (
<Tabs defaultValue="account" className="mt-4">
<TabsList>
<TabsTrigger value="account">Word cloud</TabsTrigger>
<TabsTrigger value="password">Table</TabsTrigger>
</TabsList>
<TabsContent value="account">
<TagWordCloud></TagWordCloud>
</TabsContent>
<TabsContent value="password">
<TagTable></TagTable>
</TabsContent>
</Tabs>
<section className="mt-4">
<Segmented
value={value}
options={options}
onChange={(val) => setValue(val as TagType)}
/>
{TagContentMap[value]}
</section>
);
}

View File

@ -16,4 +16,5 @@ export const ImageMap = {
table: getImageName('table', 2),
one: getImageName('one', 2),
knowledge_graph: getImageName('knowledge-graph', 2),
tag: getImageName('tag', 2),
};

View File

@ -1,9 +1,10 @@
import { EditableCell, EditableRow } from '@/components/editable-cell';
import { useTranslate } from '@/hooks/common-hooks';
import { RAGFlowNodeType } from '@/interfaces/database/flow';
import { DeleteOutlined } from '@ant-design/icons';
import { Button, Flex, Select, Table, TableProps } from 'antd';
import { useBuildComponentIdSelectOptions } from '../../hooks/use-get-begin-query';
import { IGenerateParameter, RAGFlowNodeType } from '../../interface';
import { IGenerateParameter } from '../../interface';
import { useHandleOperateParameters } from './hooks';
import styles from './index.less';
@ -46,7 +47,7 @@ const DynamicParameters = ({ node }: IProps) => {
}),
},
{
title: t('componentId'),
title: t('value'),
dataIndex: 'component_id',
key: 'component_id',
align: 'center',