Feat: Add tag_kwd parameter to chunk configuration modal #4368 (#4414)

### What problem does this PR solve?

Feat: Add tag_kwd parameter to chunk configuration modal  #4368

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2025-01-08 19:45:34 +08:00 committed by GitHub
parent 3d66d78304
commit af43cb04e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 204 additions and 13 deletions

View File

@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
};
export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
export const showTagItems = (parserId: string) => {
return !excludedTagParseMethods.includes(parserId);
};
// The three types "table", "resume" and "one" do not display this configuration.
const ParseConfiguration = () => {
const form = Form.useFormInstance();

View File

@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
searchTags: 'Search tags',
tagCloud: 'Cloud',
tagTable: 'Table',
tagSet: 'Tag set',
tagSetTip: `
<p> Selecting the 'Tag' knowledge bases helps to tag every chunks. </p>
<p>Query to those chunks will also be with tags too.</p>
This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
<p>Difference between tags and keywords:</p>
<ul>
<li>Tag is a close set which is defined and manipulated by user while keyword is an open set.</li>
<li>You need to upload tag sets with samples prior to use.</li>
<li>Keywords are generated by LLM which is expensive and time consuming.</li>
</ul>
`,
topnTags: 'Top-N Tags',
},
chunk: {
chunk: 'Chunk',

View File

@ -309,6 +309,19 @@ export default {
searchTags: '搜尋標籤',
tagCloud: '雲端',
tagTable: '表',
tagSet: '標籤庫',
topnTags: 'Top-N 標籤',
tagSetTip: `
<p> </p>
<p>
<p></p>
<ul>
<li>使
<li>使
<li> LLM
</ul>
`,
},
chunk: {
chunk: '解析塊',

View File

@ -326,6 +326,19 @@ export default {
searchTags: '搜索标签',
tagCloud: '云',
tagTable: '表',
tagSet: '标签库',
topnTags: 'Top-N 标签',
tagSetTip: `
<p> </p>
<p> </p>
<p></p>
<ul>
<li> </li>
<li>使 </li>
<li> LLM </li>
</ul>
`,
},
chunk: {
chunk: '解析块',

View File

@ -13,6 +13,7 @@ type FieldType = {
interface kFProps {
doc_id: string;
chunkId: string | undefined;
parserId: string;
}
const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
@ -21,15 +22,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
hideModal,
onOk,
loading,
parserId,
}) => {
const [form] = Form.useForm();
const [checked, setChecked] = useState(false);
const [keywords, setKeywords] = useState<string[]>([]);
const [question, setQuestion] = useState<string[]>([]);
const [tagKeyWords, setTagKeyWords] = useState<string[]>([]);
const { removeChunk } = useDeleteChunkByIds();
const { data } = useFetchChunk(chunkId);
const { t } = useTranslation();
const isTagParser = parserId === 'tag';
useEffect(() => {
if (data?.code === 0) {
const {
@ -37,16 +42,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
important_kwd = [],
available_int,
question_kwd = [],
tag_kwd = [],
} = data.data;
form.setFieldsValue({ content: content_with_weight });
setKeywords(important_kwd);
setQuestion(question_kwd);
setTagKeyWords(tag_kwd);
setChecked(available_int !== 0);
}
if (!chunkId) {
setKeywords([]);
setQuestion([]);
setTagKeyWords([]);
form.setFieldsValue({ content: undefined });
}
}, [data, form, chunkId]);
@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
content: values.content,
keywords, // keywords
question_kwd: question,
tag_kwd: tagKeyWords,
available_int: checked ? 1 : 0, // available_int
});
} catch (errorInfo) {
@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
</div>
<EditTag tags={question} setTags={setQuestion} />
</section>
{isTagParser && (
<section className="mt-4">
<p className="mb-2">{t('knowledgeConfiguration.tagName')} </p>
<EditTag tags={tagKeyWords} setTags={setTagKeyWords} />
</section>
)}
{chunkId && (
<section>
<Divider></Divider>

View File

@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
keywords,
available_int,
question_kwd,
tag_kwd,
}: {
content: string;
keywords: string;
available_int: number;
question_kwd: string;
tag_kwd: string;
}) => {
const code = await createChunk({
content_with_weight: content,
@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
important_kwd: keywords, // keywords
available_int,
question_kwd,
tag_kwd,
});
if (code === 0) {

View File

@ -193,6 +193,7 @@ const Chunk = () => {
visible={chunkUpdatingVisible}
loading={chunkUpdatingLoading}
onOk={onChunkUpdatingOk}
parserId={documentInfo.parser_id}
/>
)}
<KnowledgeGraphModal></KnowledgeGraphModal>

View File

@ -79,7 +79,7 @@ const ParsingActionCell = ({
<Dropdown
menu={{ items: chunkItems }}
trigger={['click']}
disabled={isRunning}
disabled={isRunning || record.parser_id === 'tag'}
>
<Button type="text" className={styles.iconButton}>
<ToolOutlined size={20} />

View File

@ -9,7 +9,7 @@ import styles from './index.less';
import { TagTabs } from './tag-tabs';
import { ImageMap } from './utils';
const { Title, Text } = Typography;
const { Text } = Typography;
const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
const parserList = useSelectParserList();
@ -37,15 +37,15 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
<section className={styles.categoryPanelWrapper}>
{imageList.length > 0 ? (
<>
<Title level={5} className={styles.topTitle}>
<h5 className="font-semibold text-base mt-0 mb-1">
{`"${item.title}" ${t('methodTitle')}`}
</Title>
</h5>
<p
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(item.description),
}}
></p>
<Title level={5}>{`"${item.title}" ${t('methodExamples')}`}</Title>
<h5 className="font-semibold text-base mt-4 mb-1">{`"${item.title}" ${t('methodExamples')}`}</h5>
<Text>{t('methodExamplesDescription')}</Text>
<Row gutter={[10, 10]} className={styles.imageRow}>
{imageList.map((x) => (
@ -58,9 +58,9 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
</Col>
))}
</Row>
<Title level={5}>
<h5 className="font-semibold text-base mt-4 mb-1">
{item.title} {t('dialogueExamplesTitle')}
</Title>
</h5>
<Divider></Divider>
</>
) : (

View File

@ -11,6 +11,7 @@ import MaxTokenNumber from '@/components/max-token-number';
import PageRank from '@/components/page-rank';
import ParseConfiguration, {
showRaptorParseConfiguration,
showTagItems,
} from '@/components/parse-configuration';
import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
@ -23,6 +24,7 @@ import {
useSubmitKnowledgeConfiguration,
} from './hooks';
import styles from './index.less';
import { TagItems } from './tag-item';
const { Option } = Select;
@ -146,6 +148,8 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
{showRaptorParseConfiguration(parserId) && (
<ParseConfiguration></ParseConfiguration>
)}
{showTagItems(parserId) && <TagItems></TagItems>}
</>
);
}}

View File

@ -0,0 +1,101 @@
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
import { UserOutlined } from '@ant-design/icons';
import {
Avatar,
Divider,
Flex,
Form,
InputNumber,
Select,
Slider,
Space,
} from 'antd';
import DOMPurify from 'dompurify';
import { useTranslation } from 'react-i18next';
export const TagSetItem = () => {
const { t } = useTranslation();
const { list: knowledgeList } = useFetchKnowledgeList(true);
const knowledgeOptions = knowledgeList
.filter((x) => x.parser_id === 'tag')
.map((x) => ({
label: (
<Space>
<Avatar size={20} icon={<UserOutlined />} src={x.avatar} />
{x.name}
</Space>
),
value: x.id,
}));
return (
<Form.Item
label={t('knowledgeConfiguration.tagSet')}
name={['parser_config', 'tag_kb_ids']}
tooltip={
<div
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(t('knowledgeConfiguration.tagSetTip')),
}}
></div>
}
rules={[
{
message: t('chat.knowledgeBasesMessage'),
type: 'array',
},
]}
>
<Select
mode="multiple"
options={knowledgeOptions}
placeholder={t('chat.knowledgeBasesMessage')}
></Select>
</Form.Item>
);
};
export const TopNTagsItem = () => {
const { t } = useTranslation();
return (
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'topn_tags']}
noStyle
initialValue={3}
>
<Slider max={10} min={1} style={{ width: '100%' }} />
</Form.Item>
</Flex>
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
<InputNumber max={10} min={1} />
</Form.Item>
</Flex>
</Form.Item>
);
};
export function TagItems() {
return (
<>
<Divider />
<TagSetItem></TagSetItem>
<Form.Item noStyle dependencies={[['parser_config', 'tag_kb_ids']]}>
{({ getFieldValue }) => {
const ids: string[] = getFieldValue(['parser_config', 'tag_kb_ids']);
return (
Array.isArray(ids) &&
ids.length > 0 && <TopNTagsItem></TopNTagsItem>
);
}}
</Form.Item>
<Divider />
</>
);
}

View File

@ -108,7 +108,7 @@ export function TagTable() {
variant="ghost"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
>
{t('knowledgeConfiguration.tag')}
{t('knowledgeConfiguration.tagName')}
<ArrowUpDown />
</Button>
);

View File

@ -1,12 +1,23 @@
import { useFetchTagList } from '@/hooks/knowledge-hooks';
import { Chart } from '@antv/g2';
import { useCallback, useEffect, useRef } from 'react';
import { sumBy } from 'lodash';
import { useCallback, useEffect, useMemo, useRef } from 'react';
export function TagWordCloud() {
const domRef = useRef<HTMLDivElement>(null);
let chartRef = useRef<Chart>();
const { list } = useFetchTagList();
const { list: tagList } = useMemo(() => {
const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256);
return {
list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
sumValue: sumBy(nextList, (x: [string, number]) => x[1]),
length: nextList.length,
};
}, [list]);
const renderWordCloud = useCallback(() => {
if (domRef.current) {
chartRef.current = new Chart({ container: domRef.current });
@ -14,19 +25,30 @@ export function TagWordCloud() {
chartRef.current.options({
type: 'wordCloud',
autoFit: true,
layout: { fontSize: [20, 100] },
layout: {
fontSize: [20, 100],
// fontSize: (d: any) => {
// if (d.value) {
// return (d.value / sumValue) * 100 * (length / 10);
// }
// return 0;
// },
},
data: {
type: 'inline',
value: list.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
value: tagList,
},
encode: { color: 'text' },
legend: false,
tooltip: false,
tooltip: {
title: 'name', // title
items: ['value'], // data item
},
});
chartRef.current.render();
}
}, [list]);
}, [tagList]);
useEffect(() => {
renderWordCloud();