From 49cebd9fec3105130e55d987ae87461667debd81 Mon Sep 17 00:00:00 2001
From: balibabu
Date: Tue, 7 Jan 2025 19:33:53 +0800
Subject: [PATCH] Feat: Add description for tag parsing method #4368 (#4402)
### What problem does this PR solve?
Feat: Add description for tag parsing method #4368
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
---
web/src/assets/svg/chunk-method/tag-01.svg | 84 +++++++++++++++++++
web/src/assets/svg/chunk-method/tag-02.svg | 84 +++++++++++++++++++
web/src/locales/en.ts | 14 +++-
web/src/locales/zh-traditional.ts | 14 +++-
web/src/locales/zh.ts | 14 +++-
.../components/knowledge-setting/tag-tabs.tsx | 46 +++++++---
.../components/knowledge-setting/utils.ts | 1 +
.../form/generate-form/dynamic-parameters.tsx | 5 +-
8 files changed, 244 insertions(+), 18 deletions(-)
create mode 100644 web/src/assets/svg/chunk-method/tag-01.svg
create mode 100644 web/src/assets/svg/chunk-method/tag-02.svg
diff --git a/web/src/assets/svg/chunk-method/tag-01.svg b/web/src/assets/svg/chunk-method/tag-01.svg
new file mode 100644
index 000000000..0ed6351ee
--- /dev/null
+++ b/web/src/assets/svg/chunk-method/tag-01.svg
@@ -0,0 +1,84 @@
+
\ No newline at end of file
diff --git a/web/src/assets/svg/chunk-method/tag-02.svg b/web/src/assets/svg/chunk-method/tag-02.svg
new file mode 100644
index 000000000..b8498d6ab
--- /dev/null
+++ b/web/src/assets/svg/chunk-method/tag-02.svg
@@ -0,0 +1,84 @@
+
\ No newline at end of file
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index 788862673..a42daa827 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -286,6 +286,16 @@ export default {
This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.
The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.
Ensure that you set the Entity types.
`,
+ tag: `
Knowlege base using 'Tag' as a chunking method is supposed to be used by other knowledge bases to add tags to their chunks, queries to which will also be with tags too.
+
Knowlege base using 'Tag' as a chunking method is NOT supposed to be involved in RAG procedure.
+
The chunks in this knowledge base are examples of tags, which demonstrate the entire tag set and the relevance between chunk and tags.
+
+
This chunk method supports EXCEL and CSV/TXT file formats.
+
If a file is in Excel format, it should contain two columns without headers: one for content and the other for tags, with the content column preceding the tags column. Multiple sheets are acceptable, provided the columns are properly structured.
+
If a file is in CSV/TXT format, it must be UTF-8 encoded with TAB as the delimiter to separate content and tags.
+
In tags column, there're English comma between tags.
+Lines of texts that fail to follow the above rules will be ignored, and each pair will be considered a distinct chunk.
+`,
useRaptor: 'Use RAPTOR to enhance retrieval',
useRaptorTip:
'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.',
@@ -310,9 +320,11 @@ The above is the content you need to summarize.`,
vietnamese: 'Vietnamese',
pageRank: 'Page rank',
pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`,
- tag: 'Tag',
+ tagName: 'Tag',
frequency: 'Frequency',
searchTags: 'Search tags',
+ tagCloud: 'Cloud',
+ tagTable: 'Table',
},
chunk: {
chunk: 'Chunk',
diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts
index 50a59c7dc..26dcfa4b7 100644
--- a/web/src/locales/zh-traditional.ts
+++ b/web/src/locales/zh-traditional.ts
@@ -271,6 +271,16 @@ export default {