diff --git a/web/src/components/parse-configuration/index.tsx b/web/src/components/parse-configuration/index.tsx index 7786eddcb..b944b9712 100644 --- a/web/src/components/parse-configuration/index.tsx +++ b/web/src/components/parse-configuration/index.tsx @@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => { return !excludedParseMethods.includes(parserId); }; +export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag']; + +export const showTagItems = (parserId: string) => { + return !excludedTagParseMethods.includes(parserId); +}; + // The three types "table", "resume" and "one" do not display this configuration. const ParseConfiguration = () => { const form = Form.useFormInstance(); diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index a42daa827..d3f5b2685 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -325,6 +325,19 @@ The above is the content you need to summarize.`, searchTags: 'Search tags', tagCloud: 'Cloud', tagTable: 'Table', + tagSet: 'Tag set', + tagSetTip: ` +

Selecting the 'Tag' knowledge bases helps to tag every chunks.

+

Query to those chunks will also be with tags too.

+This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks. +

Difference between tags and keywords:

+ + `, + topnTags: 'Top-N Tags', }, chunk: { chunk: 'Chunk', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 26dcfa4b7..d1db43cf5 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -309,6 +309,19 @@ export default { searchTags: '搜尋標籤', tagCloud: '雲端', tagTable: '表', + tagSet: '標籤庫', + topnTags: 'Top-N 標籤', + tagSetTip: ` +

選擇「標籤」知識庫有助於標記每個區塊。

+

對這些區塊的查詢也將帶有標籤。 +此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。 +

標籤和關鍵字的差異:

+ + `, }, chunk: { chunk: '解析塊', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index b5f7d415e..ecc9ef140 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -326,6 +326,19 @@ export default { searchTags: '搜索标签', tagCloud: '云', tagTable: '表', + tagSet: '标签库', + topnTags: 'Top-N 标签', + tagSetTip: ` +

选择“标签”知识库有助于标记每个块。

+

对这些块的查询也将带有标签。

+ 此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。 +

标签和关键字之间的区别:

+ + `, }, chunk: { chunk: '解析块', diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx index 01a7b34da..cf1748c81 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx @@ -13,6 +13,7 @@ type FieldType = { interface kFProps { doc_id: string; chunkId: string | undefined; + parserId: string; } const ChunkCreatingModal: React.FC & kFProps> = ({ @@ -21,15 +22,19 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ hideModal, onOk, loading, + parserId, }) => { const [form] = Form.useForm(); const [checked, setChecked] = useState(false); const [keywords, setKeywords] = useState([]); const [question, setQuestion] = useState([]); + const [tagKeyWords, setTagKeyWords] = useState([]); const { removeChunk } = useDeleteChunkByIds(); const { data } = useFetchChunk(chunkId); const { t } = useTranslation(); + const isTagParser = parserId === 'tag'; + useEffect(() => { if (data?.code === 0) { const { @@ -37,16 +42,19 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ important_kwd = [], available_int, question_kwd = [], + tag_kwd = [], } = data.data; form.setFieldsValue({ content: content_with_weight }); setKeywords(important_kwd); setQuestion(question_kwd); + setTagKeyWords(tag_kwd); setChecked(available_int !== 0); } if (!chunkId) { setKeywords([]); setQuestion([]); + setTagKeyWords([]); form.setFieldsValue({ content: undefined }); } }, [data, form, chunkId]); @@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ content: values.content, keywords, // keywords question_kwd: question, + tag_kwd: tagKeyWords, available_int: checked ? 1 : 0, // available_int }); } catch (errorInfo) { @@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ + {isTagParser && ( +
+

{t('knowledgeConfiguration.tagName')}

+ +
+ )} {chunkId && (
diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts index e5debcb58..41271c36f 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts @@ -100,11 +100,13 @@ export const useUpdateChunk = () => { keywords, available_int, question_kwd, + tag_kwd, }: { content: string; keywords: string; available_int: number; question_kwd: string; + tag_kwd: string; }) => { const code = await createChunk({ content_with_weight: content, @@ -113,6 +115,7 @@ export const useUpdateChunk = () => { important_kwd: keywords, // keywords available_int, question_kwd, + tag_kwd, }); if (code === 0) { diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx index 37db88da8..1f0a87db2 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/index.tsx @@ -193,6 +193,7 @@ const Chunk = () => { visible={chunkUpdatingVisible} loading={chunkUpdatingLoading} onOk={onChunkUpdatingOk} + parserId={documentInfo.parser_id} /> )} diff --git a/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx b/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx index 8c644aac0..f195df67c 100644 --- a/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-file/parsing-action-cell/index.tsx @@ -79,7 +79,7 @@ const ParsingActionCell = ({ ); diff --git a/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx b/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx index 63d7c778f..1bd8282a3 100644 --- a/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-setting/tag-word-cloud.tsx @@ -1,12 +1,23 @@ import { useFetchTagList } from '@/hooks/knowledge-hooks'; import { Chart } from '@antv/g2'; -import { useCallback, useEffect, useRef } from 'react'; +import { sumBy } from 'lodash'; +import { useCallback, useEffect, useMemo, useRef } from 'react'; export function TagWordCloud() { const domRef = useRef(null); let chartRef = useRef(); const { list } = useFetchTagList(); + const { list: tagList } = useMemo(() => { + const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256); + + return { + list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })), + sumValue: sumBy(nextList, (x: [string, number]) => x[1]), + length: nextList.length, + }; + }, [list]); + const renderWordCloud = useCallback(() => { if (domRef.current) { chartRef.current = new Chart({ container: domRef.current }); @@ -14,19 +25,30 @@ export function TagWordCloud() { chartRef.current.options({ type: 'wordCloud', autoFit: true, - layout: { fontSize: [20, 100] }, + layout: { + fontSize: [20, 100], + // fontSize: (d: any) => { + // if (d.value) { + // return (d.value / sumValue) * 100 * (length / 10); + // } + // return 0; + // }, + }, data: { type: 'inline', - value: list.map((x) => ({ text: x[0], value: x[1], name: x[0] })), + value: tagList, }, encode: { color: 'text' }, legend: false, - tooltip: false, + tooltip: { + title: 'name', // title + items: ['value'], // data item + }, }); chartRef.current.render(); } - }, [list]); + }, [tagList]); useEffect(() => { renderWordCloud();