Feat: Add description for tag parsing method #4368 (#4402)

### What problem does this PR solve? Feat: Add description for tag parsing method #4368 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-08-14 01:35:59 +08:00 · 2025-01-07 19:33:53 +08:00 · 2025-01-07 19:33:53 +08:00 · 49cebd9fec
commit 49cebd9fec
parent d9a4e4cc3b
8 changed files with 244 additions and 18 deletions
--- a/web/src/assets/svg/chunk-method/tag-01.svg
+++ b/web/src/assets/svg/chunk-method/tag-01.svg
--- a/web/src/assets/svg/chunk-method/tag-02.svg
+++ b/web/src/assets/svg/chunk-method/tag-02.svg
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@ -286,6 +286,16 @@ export default {
 <p>This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.</p>
 <p>The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.</p>
 <p>Ensure that you set the <b>Entity types</b>.</p>`,
+      tag: `<p>Knowlege base using 'Tag' as a chunking method is supposed to be used by other knowledge bases to add tags to their chunks, queries to which will also be with tags too.</p>
+<p>Knowlege base using 'Tag' as a chunking method is <b>NOT</b> supposed to be involved in RAG procedure.</p>
+<p>The chunks in this knowledge base are examples of tags, which demonstrate the entire tag set and the relevance between chunk and tags.</p>
+
+<p>This chunk method supports <b>EXCEL</b> and <b>CSV/TXT</b> file formats.</p>
+<p>If a file is in <b>Excel</b> format, it should contain two columns without headers: one for content and the other for tags, with the content column preceding the tags column. Multiple sheets are acceptable, provided the columns are properly structured.</p>
+<p>If a file is in <b>CSV/TXT</b> format, it must be UTF-8 encoded with TAB as the delimiter to separate content and tags.</p>
+<p>In tags column, there're English <b>comma</b> between tags.</p>
+<i>Lines of texts that fail to follow the above rules will be ignored, and each  pair will be considered a distinct chunk.</i>
+`,
      useRaptor: 'Use RAPTOR to enhance retrieval',
      useRaptorTip:
        'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.',
@ -310,9 +320,11 @@ The above is the content you need to summarize.`,
      vietnamese: 'Vietnamese',
      pageRank: 'Page rank',
      pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`,
-      tag: 'Tag',
+      tagName: 'Tag',
      frequency: 'Frequency',
      searchTags: 'Search tags',
+      tagCloud: 'Cloud',
+      tagTable: 'Table',
    },
    chunk: {
      chunk: 'Chunk',
--- a/web/src/locales/zh-traditional.ts
+++ b/web/src/locales/zh-traditional.ts
@ -271,6 +271,16 @@ export default {
 <p>接下來，區塊將傳送到LLM以提取知識圖譜和思維導圖的節點和關係。

 <p>請注意您需要指定的條目類型。</p></p>`,
+      tag: `<p>使用「標籤」作為分塊方法的知識庫應該被其他知識庫用來將標籤加入其區塊中，查詢也將帶有標籤。
+<p>使用「標籤」作為分塊方法的知識庫<b>不</b>應該參與 RAG 過程。
+<p>本知識庫中的區塊是標籤的範例，展示了整個標籤集以及區塊與標籤之間的相關性。
+
+<p>此區塊方法支援<b>EXCEL</b>和<b>CSV/TXT</b>檔案格式。
+<p>如果檔案採用 <b>Excel</b> 格式，則應包含兩列，不含標題：一列用於內容，另一列用於標籤，內容列位於標籤列之前。只要列的結構正確，多張紙也是可以接受的。
+<p>如果檔案為<b>CSV/TXT</b>格式，則必須採用UTF-8編碼，並以TAB作為分隔符號來分隔內容和標籤。
+<p>標籤欄中，標籤之間有英文<b>逗號</b>。
+<i>不符合上述規則的文字行將被忽略，並且每一對將被視為一個不同的區塊。
+`,
      useRaptor: '使用RAPTOR文件增強策略',
      useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
      prompt: '提示詞',
@ -294,9 +304,11 @@ export default {
      pageRank: '頁面排名',
      pageRankTip: `這用來提高相關性分數。所有檢索到的區塊的相關性得分將加上該數字。
 當您想要先搜尋給定的知識庫時，請設定比其他人更高的 pagerank 分數。`,
-      tag: '標籤',
+      tagName: '標籤',
      frequency: '頻次',
      searchTags: '搜尋標籤',
+      tagCloud: '雲端',
+      tagTable: '表',
    },
    chunk: {
      chunk: '解析塊',
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@ -288,6 +288,16 @@ export default {
 <p>接下来，将分块传输到 LLM 以提取知识图谱和思维导图的节点和关系。</p>

 注意您需要指定的条目类型。</p>`,
+      tag: `<p>使用“标签”作为分块方法的知识库应该被其他知识库使用，以将标签添加到其块中，对这些块的查询也将带有标签。</p>
+<p>使用“标签”作为分块方法的知识库<b>不</b>应该参与 RAG 过程。</p>
+<p>此知识库中的块是标签的示例，它们演示了整个标签集以及块和标签之间的相关性。</p>
+
+<p>此块方法支持<b>EXCEL</b>和<b>CSV/TXT</b>文件格式。</p>
+<p>如果文件为<b>Excel</b>格式，则它应该包含两列无标题：一列用于内容，另一列用于标签，内容列位于标签列之前。可以接受多个工作表，只要列结构正确即可。</p>
+<p>如果文件为 <b>CSV/TXT</b> 格式，则必须使用 UTF-8 编码并以 TAB 作为分隔符来分隔内容和标签。</p>
+<p>在标签列中，标签之间使用英文 <b>逗号</b>。</p>
+<i>不符合上述规则的文本行将被忽略，并且每对文本将被视为一个不同的块。</i>
+`,
      useRaptor: '使用召回增强RAPTOR策略',
      useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
      prompt: '提示词',
@ -311,9 +321,11 @@ export default {
      pageRank: '页面排名',
      pageRankTip: `这用于提高相关性得分。所有检索到的块的相关性得分将加上此数字。
 当您想首先搜索给定的知识库时，请设置比其他知识库更高的 pagerank 得分。`,
-      tag: '标签',
+      tagName: '标签',
      frequency: '频次',
      searchTags: '搜索标签',
+      tagCloud: '云',
+      tagTable: '表',
    },
    chunk: {
      chunk: '解析块',
--- a/web/src/pages/add-knowledge/components/knowledge-setting/tag-tabs.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/tag-tabs.tsx
@ -1,20 +1,40 @@
-import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
+import { Segmented } from 'antd';
+import { SegmentedLabeledOption } from 'antd/es/segmented';
+import { upperFirst } from 'lodash';
+import { useState } from 'react';
+import { useTranslation } from 'react-i18next';
 import { TagTable } from './tag-table';
 import { TagWordCloud } from './tag-word-cloud';

+enum TagType {
+  Cloud = 'cloud',
+  Table = 'table',
+}
+
+const TagContentMap = {
+  [TagType.Cloud]: <TagWordCloud></TagWordCloud>,
+  [TagType.Table]: <TagTable></TagTable>,
+};
+
 export function TagTabs() {
+  const [value, setValue] = useState<TagType>(TagType.Cloud);
+  const { t } = useTranslation();
+
+  const options: SegmentedLabeledOption[] = [TagType.Cloud, TagType.Table].map(
+    (x) => ({
+      label: t(`knowledgeConfiguration.tag${upperFirst(x)}`),
+      value: x,
+    }),
+  );
+
  return (
-    <Tabs defaultValue="account" className="mt-4">
-      <TabsList>
-        <TabsTrigger value="account">Word cloud</TabsTrigger>
-        <TabsTrigger value="password">Table</TabsTrigger>
-      </TabsList>
-      <TabsContent value="account">
-        <TagWordCloud></TagWordCloud>
-      </TabsContent>
-      <TabsContent value="password">
-        <TagTable></TagTable>
-      </TabsContent>
-    </Tabs>
+    <section className="mt-4">
+      <Segmented
+        value={value}
+        options={options}
+        onChange={(val) => setValue(val as TagType)}
+      />
+      {TagContentMap[value]}
+    </section>
  );
 }
--- a/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/utils.ts
@ -16,4 +16,5 @@ export const ImageMap = {
  table: getImageName('table', 2),
  one: getImageName('one', 2),
  knowledge_graph: getImageName('knowledge-graph', 2),
+  tag: getImageName('tag', 2),
 };
--- a/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx
+++ b/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx
@ -1,9 +1,10 @@
 import { EditableCell, EditableRow } from '@/components/editable-cell';
 import { useTranslate } from '@/hooks/common-hooks';
+import { RAGFlowNodeType } from '@/interfaces/database/flow';
 import { DeleteOutlined } from '@ant-design/icons';
 import { Button, Flex, Select, Table, TableProps } from 'antd';
 import { useBuildComponentIdSelectOptions } from '../../hooks/use-get-begin-query';
-import { IGenerateParameter, RAGFlowNodeType } from '../../interface';
+import { IGenerateParameter } from '../../interface';
 import { useHandleOperateParameters } from './hooks';

 import styles from './index.less';
@ -46,7 +47,7 @@ const DynamicParameters = ({ node }: IProps) => {
      }),
    },
    {
-      title: t('componentId'),
+      title: t('value'),
      dataIndex: 'component_id',
      key: 'component_id',
      align: 'center',