diff --git a/web/src/components/parse-configuration/index.tsx b/web/src/components/parse-configuration/index.tsx
index 7786eddcb..b944b9712 100644
--- a/web/src/components/parse-configuration/index.tsx
+++ b/web/src/components/parse-configuration/index.tsx
@@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
};
+export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
+
+export const showTagItems = (parserId: string) => {
+ return !excludedTagParseMethods.includes(parserId);
+};
+
// The three types "table", "resume" and "one" do not display this configuration.
const ParseConfiguration = () => {
const form = Form.useFormInstance();
diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts
index a42daa827..d3f5b2685 100644
--- a/web/src/locales/en.ts
+++ b/web/src/locales/en.ts
@@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
searchTags: 'Search tags',
tagCloud: 'Cloud',
tagTable: 'Table',
+ tagSet: 'Tag set',
+ tagSetTip: `
+
Selecting the 'Tag' knowledge bases helps to tag every chunks.
+Query to those chunks will also be with tags too.
+This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
+Difference between tags and keywords:
+
+ - Tag is a close set which is defined and manipulated by user while keyword is an open set.
+ - You need to upload tag sets with samples prior to use.
+ - Keywords are generated by LLM which is expensive and time consuming.
+
+ `,
+ topnTags: 'Top-N Tags',
},
chunk: {
chunk: 'Chunk',
diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts
index 26dcfa4b7..d1db43cf5 100644
--- a/web/src/locales/zh-traditional.ts
+++ b/web/src/locales/zh-traditional.ts
@@ -309,6 +309,19 @@ export default {
searchTags: '搜尋標籤',
tagCloud: '雲端',
tagTable: '表',
+ tagSet: '標籤庫',
+ topnTags: 'Top-N 標籤',
+ tagSetTip: `
+ 選擇「標籤」知識庫有助於標記每個區塊。
+對這些區塊的查詢也將帶有標籤。
+此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。
+
標籤和關鍵字的差異:
+
+ - 標籤是一個閉集,由使用者定義和操作,而關鍵字是一個開集。
+
- 您需要在使用前上傳包含範例的標籤集。
+
- 關鍵字由 LLM 生成,既昂貴又耗時。
+
+ `,
},
chunk: {
chunk: '解析塊',
diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts
index b5f7d415e..ecc9ef140 100644
--- a/web/src/locales/zh.ts
+++ b/web/src/locales/zh.ts
@@ -326,6 +326,19 @@ export default {
searchTags: '搜索标签',
tagCloud: '云',
tagTable: '表',
+ tagSet: '标签库',
+ topnTags: 'Top-N 标签',
+ tagSetTip: `
+ 选择“标签”知识库有助于标记每个块。
+ 对这些块的查询也将带有标签。
+ 此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。
+ 标签和关键字之间的区别:
+
+ - 标签是一个由用户定义和操作的封闭集,而关键字是一个开放集。
+ - 您需要在使用前上传带有样本的标签集。
+ - 关键字由 LLM 生成,这既昂贵又耗时。
+
+ `,
},
chunk: {
chunk: '解析块',
diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx
index 01a7b34da..cf1748c81 100644
--- a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx
@@ -13,6 +13,7 @@ type FieldType = {
interface kFProps {
doc_id: string;
chunkId: string | undefined;
+ parserId: string;
}
const ChunkCreatingModal: React.FC & kFProps> = ({
@@ -21,15 +22,19 @@ const ChunkCreatingModal: React.FC & kFProps> = ({
hideModal,
onOk,
loading,
+ parserId,
}) => {
const [form] = Form.useForm();
const [checked, setChecked] = useState(false);
const [keywords, setKeywords] = useState([]);
const [question, setQuestion] = useState([]);
+ const [tagKeyWords, setTagKeyWords] = useState([]);
const { removeChunk } = useDeleteChunkByIds();
const { data } = useFetchChunk(chunkId);
const { t } = useTranslation();
+ const isTagParser = parserId === 'tag';
+
useEffect(() => {
if (data?.code === 0) {
const {
@@ -37,16 +42,19 @@ const ChunkCreatingModal: React.FC & kFProps> = ({
important_kwd = [],
available_int,
question_kwd = [],
+ tag_kwd = [],
} = data.data;
form.setFieldsValue({ content: content_with_weight });
setKeywords(important_kwd);
setQuestion(question_kwd);
+ setTagKeyWords(tag_kwd);
setChecked(available_int !== 0);
}
if (!chunkId) {
setKeywords([]);
setQuestion([]);
+ setTagKeyWords([]);
form.setFieldsValue({ content: undefined });
}
}, [data, form, chunkId]);
@@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC & kFProps> = ({
content: values.content,
keywords, // keywords
question_kwd: question,
+ tag_kwd: tagKeyWords,
available_int: checked ? 1 : 0, // available_int
});
} catch (errorInfo) {
@@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC & kFProps> = ({
+ {isTagParser && (
+
+ {t('knowledgeConfiguration.tagName')}
+
+
+ )}
{chunkId && (
diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts
index e5debcb58..41271c36f 100644
--- a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts
+++ b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts
@@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
keywords,
available_int,
question_kwd,
+ tag_kwd,
}: {
content: string;
keywords: string;
available_int: number;
question_kwd: string;
+ tag_kwd: string;
}) => {
const code = await createChunk({
content_with_weight: content,
@@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
important_kwd: keywords, // keywords
available_int,
question_kwd,
+ tag_kwd,
});
if (code === 0) {
diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx
index 37db88da8..1f0a87db2 100644
--- a/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx
@@ -193,6 +193,7 @@ const Chunk = () => {
visible={chunkUpdatingVisible}
loading={chunkUpdatingLoading}
onOk={onChunkUpdatingOk}
+ parserId={documentInfo.parser_id}
/>
)}
diff --git a/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx b/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx
index 8c644aac0..f195df67c 100644
--- a/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx
@@ -79,7 +79,7 @@ const ParsingActionCell = ({
);
diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx b/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx
index 63d7c778f..1bd8282a3 100644
--- a/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx
+++ b/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx
@@ -1,12 +1,23 @@
import { useFetchTagList } from '@/hooks/knowledge-hooks';
import { Chart } from '@antv/g2';
-import { useCallback, useEffect, useRef } from 'react';
+import { sumBy } from 'lodash';
+import { useCallback, useEffect, useMemo, useRef } from 'react';
export function TagWordCloud() {
const domRef = useRef(null);
let chartRef = useRef();
const { list } = useFetchTagList();
+ const { list: tagList } = useMemo(() => {
+ const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256);
+
+ return {
+ list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
+ sumValue: sumBy(nextList, (x: [string, number]) => x[1]),
+ length: nextList.length,
+ };
+ }, [list]);
+
const renderWordCloud = useCallback(() => {
if (domRef.current) {
chartRef.current = new Chart({ container: domRef.current });
@@ -14,19 +25,30 @@ export function TagWordCloud() {
chartRef.current.options({
type: 'wordCloud',
autoFit: true,
- layout: { fontSize: [20, 100] },
+ layout: {
+ fontSize: [20, 100],
+ // fontSize: (d: any) => {
+ // if (d.value) {
+ // return (d.value / sumValue) * 100 * (length / 10);
+ // }
+ // return 0;
+ // },
+ },
data: {
type: 'inline',
- value: list.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
+ value: tagList,
},
encode: { color: 'text' },
legend: false,
- tooltip: false,
+ tooltip: {
+ title: 'name', // title
+ items: ['value'], // data item
+ },
});
chartRef.current.render();
}
- }, [list]);
+ }, [tagList]);
useEffect(() => {
renderWordCloud();