mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 13:19:00 +08:00
### What problem does this PR solve? Feat: Add tag_kwd parameter to chunk configuration modal #4368 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
3d66d78304
commit
af43cb04e8
@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
|
||||
return !excludedParseMethods.includes(parserId);
|
||||
};
|
||||
|
||||
export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];
|
||||
|
||||
export const showTagItems = (parserId: string) => {
|
||||
return !excludedTagParseMethods.includes(parserId);
|
||||
};
|
||||
|
||||
// The three types "table", "resume" and "one" do not display this configuration.
|
||||
const ParseConfiguration = () => {
|
||||
const form = Form.useFormInstance();
|
||||
|
@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
|
||||
searchTags: 'Search tags',
|
||||
tagCloud: 'Cloud',
|
||||
tagTable: 'Table',
|
||||
tagSet: 'Tag set',
|
||||
tagSetTip: `
|
||||
<p> Selecting the 'Tag' knowledge bases helps to tag every chunks. </p>
|
||||
<p>Query to those chunks will also be with tags too.</p>
|
||||
This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
|
||||
<p>Difference between tags and keywords:</p>
|
||||
<ul>
|
||||
<li>Tag is a close set which is defined and manipulated by user while keyword is an open set.</li>
|
||||
<li>You need to upload tag sets with samples prior to use.</li>
|
||||
<li>Keywords are generated by LLM which is expensive and time consuming.</li>
|
||||
</ul>
|
||||
`,
|
||||
topnTags: 'Top-N Tags',
|
||||
},
|
||||
chunk: {
|
||||
chunk: 'Chunk',
|
||||
|
@ -309,6 +309,19 @@ export default {
|
||||
searchTags: '搜尋標籤',
|
||||
tagCloud: '雲端',
|
||||
tagTable: '表',
|
||||
tagSet: '標籤庫',
|
||||
topnTags: 'Top-N 標籤',
|
||||
tagSetTip: `
|
||||
<p> 選擇「標籤」知識庫有助於標記每個區塊。 </p>
|
||||
<p>對這些區塊的查詢也將帶有標籤。
|
||||
此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。
|
||||
<p>標籤和關鍵字的差異:</p>
|
||||
<ul>
|
||||
<li>標籤是一個閉集,由使用者定義和操作,而關鍵字是一個開集。
|
||||
<li>您需要在使用前上傳包含範例的標籤集。
|
||||
<li>關鍵字由 LLM 生成,既昂貴又耗時。
|
||||
</ul>
|
||||
`,
|
||||
},
|
||||
chunk: {
|
||||
chunk: '解析塊',
|
||||
|
@ -326,6 +326,19 @@ export default {
|
||||
searchTags: '搜索标签',
|
||||
tagCloud: '云',
|
||||
tagTable: '表',
|
||||
tagSet: '标签库',
|
||||
topnTags: 'Top-N 标签',
|
||||
tagSetTip: `
|
||||
<p> 选择“标签”知识库有助于标记每个块。 </p>
|
||||
<p>对这些块的查询也将带有标签。 </p>
|
||||
此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。
|
||||
<p>标签和关键字之间的区别:</p>
|
||||
<ul>
|
||||
<li>标签是一个由用户定义和操作的封闭集,而关键字是一个开放集。 </li>
|
||||
<li>您需要在使用前上传带有样本的标签集。 </li>
|
||||
<li>关键字由 LLM 生成,这既昂贵又耗时。 </li>
|
||||
</ul>
|
||||
`,
|
||||
},
|
||||
chunk: {
|
||||
chunk: '解析块',
|
||||
|
@ -13,6 +13,7 @@ type FieldType = {
|
||||
interface kFProps {
|
||||
doc_id: string;
|
||||
chunkId: string | undefined;
|
||||
parserId: string;
|
||||
}
|
||||
|
||||
const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
|
||||
@ -21,15 +22,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
|
||||
hideModal,
|
||||
onOk,
|
||||
loading,
|
||||
parserId,
|
||||
}) => {
|
||||
const [form] = Form.useForm();
|
||||
const [checked, setChecked] = useState(false);
|
||||
const [keywords, setKeywords] = useState<string[]>([]);
|
||||
const [question, setQuestion] = useState<string[]>([]);
|
||||
const [tagKeyWords, setTagKeyWords] = useState<string[]>([]);
|
||||
const { removeChunk } = useDeleteChunkByIds();
|
||||
const { data } = useFetchChunk(chunkId);
|
||||
const { t } = useTranslation();
|
||||
|
||||
const isTagParser = parserId === 'tag';
|
||||
|
||||
useEffect(() => {
|
||||
if (data?.code === 0) {
|
||||
const {
|
||||
@ -37,16 +42,19 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
|
||||
important_kwd = [],
|
||||
available_int,
|
||||
question_kwd = [],
|
||||
tag_kwd = [],
|
||||
} = data.data;
|
||||
form.setFieldsValue({ content: content_with_weight });
|
||||
setKeywords(important_kwd);
|
||||
setQuestion(question_kwd);
|
||||
setTagKeyWords(tag_kwd);
|
||||
setChecked(available_int !== 0);
|
||||
}
|
||||
|
||||
if (!chunkId) {
|
||||
setKeywords([]);
|
||||
setQuestion([]);
|
||||
setTagKeyWords([]);
|
||||
form.setFieldsValue({ content: undefined });
|
||||
}
|
||||
}, [data, form, chunkId]);
|
||||
@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
|
||||
content: values.content,
|
||||
keywords, // keywords
|
||||
question_kwd: question,
|
||||
tag_kwd: tagKeyWords,
|
||||
available_int: checked ? 1 : 0, // available_int
|
||||
});
|
||||
} catch (errorInfo) {
|
||||
@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
|
||||
</div>
|
||||
<EditTag tags={question} setTags={setQuestion} />
|
||||
</section>
|
||||
{isTagParser && (
|
||||
<section className="mt-4">
|
||||
<p className="mb-2">{t('knowledgeConfiguration.tagName')} </p>
|
||||
<EditTag tags={tagKeyWords} setTags={setTagKeyWords} />
|
||||
</section>
|
||||
)}
|
||||
{chunkId && (
|
||||
<section>
|
||||
<Divider></Divider>
|
||||
|
@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
|
||||
keywords,
|
||||
available_int,
|
||||
question_kwd,
|
||||
tag_kwd,
|
||||
}: {
|
||||
content: string;
|
||||
keywords: string;
|
||||
available_int: number;
|
||||
question_kwd: string;
|
||||
tag_kwd: string;
|
||||
}) => {
|
||||
const code = await createChunk({
|
||||
content_with_weight: content,
|
||||
@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
|
||||
important_kwd: keywords, // keywords
|
||||
available_int,
|
||||
question_kwd,
|
||||
tag_kwd,
|
||||
});
|
||||
|
||||
if (code === 0) {
|
||||
|
@ -193,6 +193,7 @@ const Chunk = () => {
|
||||
visible={chunkUpdatingVisible}
|
||||
loading={chunkUpdatingLoading}
|
||||
onOk={onChunkUpdatingOk}
|
||||
parserId={documentInfo.parser_id}
|
||||
/>
|
||||
)}
|
||||
<KnowledgeGraphModal></KnowledgeGraphModal>
|
||||
|
@ -79,7 +79,7 @@ const ParsingActionCell = ({
|
||||
<Dropdown
|
||||
menu={{ items: chunkItems }}
|
||||
trigger={['click']}
|
||||
disabled={isRunning}
|
||||
disabled={isRunning || record.parser_id === 'tag'}
|
||||
>
|
||||
<Button type="text" className={styles.iconButton}>
|
||||
<ToolOutlined size={20} />
|
||||
|
@ -9,7 +9,7 @@ import styles from './index.less';
|
||||
import { TagTabs } from './tag-tabs';
|
||||
import { ImageMap } from './utils';
|
||||
|
||||
const { Title, Text } = Typography;
|
||||
const { Text } = Typography;
|
||||
|
||||
const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
|
||||
const parserList = useSelectParserList();
|
||||
@ -37,15 +37,15 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
|
||||
<section className={styles.categoryPanelWrapper}>
|
||||
{imageList.length > 0 ? (
|
||||
<>
|
||||
<Title level={5} className={styles.topTitle}>
|
||||
<h5 className="font-semibold text-base mt-0 mb-1">
|
||||
{`"${item.title}" ${t('methodTitle')}`}
|
||||
</Title>
|
||||
</h5>
|
||||
<p
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: DOMPurify.sanitize(item.description),
|
||||
}}
|
||||
></p>
|
||||
<Title level={5}>{`"${item.title}" ${t('methodExamples')}`}</Title>
|
||||
<h5 className="font-semibold text-base mt-4 mb-1">{`"${item.title}" ${t('methodExamples')}`}</h5>
|
||||
<Text>{t('methodExamplesDescription')}</Text>
|
||||
<Row gutter={[10, 10]} className={styles.imageRow}>
|
||||
{imageList.map((x) => (
|
||||
@ -58,9 +58,9 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
|
||||
</Col>
|
||||
))}
|
||||
</Row>
|
||||
<Title level={5}>
|
||||
<h5 className="font-semibold text-base mt-4 mb-1">
|
||||
{item.title} {t('dialogueExamplesTitle')}
|
||||
</Title>
|
||||
</h5>
|
||||
<Divider></Divider>
|
||||
</>
|
||||
) : (
|
||||
|
@ -11,6 +11,7 @@ import MaxTokenNumber from '@/components/max-token-number';
|
||||
import PageRank from '@/components/page-rank';
|
||||
import ParseConfiguration, {
|
||||
showRaptorParseConfiguration,
|
||||
showTagItems,
|
||||
} from '@/components/parse-configuration';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
|
||||
@ -23,6 +24,7 @@ import {
|
||||
useSubmitKnowledgeConfiguration,
|
||||
} from './hooks';
|
||||
import styles from './index.less';
|
||||
import { TagItems } from './tag-item';
|
||||
|
||||
const { Option } = Select;
|
||||
|
||||
@ -146,6 +148,8 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
|
||||
{showRaptorParseConfiguration(parserId) && (
|
||||
<ParseConfiguration></ParseConfiguration>
|
||||
)}
|
||||
|
||||
{showTagItems(parserId) && <TagItems></TagItems>}
|
||||
</>
|
||||
);
|
||||
}}
|
||||
|
@ -0,0 +1,101 @@
|
||||
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
|
||||
import { UserOutlined } from '@ant-design/icons';
|
||||
import {
|
||||
Avatar,
|
||||
Divider,
|
||||
Flex,
|
||||
Form,
|
||||
InputNumber,
|
||||
Select,
|
||||
Slider,
|
||||
Space,
|
||||
} from 'antd';
|
||||
import DOMPurify from 'dompurify';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
export const TagSetItem = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const { list: knowledgeList } = useFetchKnowledgeList(true);
|
||||
|
||||
const knowledgeOptions = knowledgeList
|
||||
.filter((x) => x.parser_id === 'tag')
|
||||
.map((x) => ({
|
||||
label: (
|
||||
<Space>
|
||||
<Avatar size={20} icon={<UserOutlined />} src={x.avatar} />
|
||||
{x.name}
|
||||
</Space>
|
||||
),
|
||||
value: x.id,
|
||||
}));
|
||||
|
||||
return (
|
||||
<Form.Item
|
||||
label={t('knowledgeConfiguration.tagSet')}
|
||||
name={['parser_config', 'tag_kb_ids']}
|
||||
tooltip={
|
||||
<div
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: DOMPurify.sanitize(t('knowledgeConfiguration.tagSetTip')),
|
||||
}}
|
||||
></div>
|
||||
}
|
||||
rules={[
|
||||
{
|
||||
message: t('chat.knowledgeBasesMessage'),
|
||||
type: 'array',
|
||||
},
|
||||
]}
|
||||
>
|
||||
<Select
|
||||
mode="multiple"
|
||||
options={knowledgeOptions}
|
||||
placeholder={t('chat.knowledgeBasesMessage')}
|
||||
></Select>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export const TopNTagsItem = () => {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
|
||||
<Flex gap={20} align="center">
|
||||
<Flex flex={1}>
|
||||
<Form.Item
|
||||
name={['parser_config', 'topn_tags']}
|
||||
noStyle
|
||||
initialValue={3}
|
||||
>
|
||||
<Slider max={10} min={1} style={{ width: '100%' }} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
|
||||
<InputNumber max={10} min={1} />
|
||||
</Form.Item>
|
||||
</Flex>
|
||||
</Form.Item>
|
||||
);
|
||||
};
|
||||
|
||||
export function TagItems() {
|
||||
return (
|
||||
<>
|
||||
<Divider />
|
||||
<TagSetItem></TagSetItem>
|
||||
<Form.Item noStyle dependencies={[['parser_config', 'tag_kb_ids']]}>
|
||||
{({ getFieldValue }) => {
|
||||
const ids: string[] = getFieldValue(['parser_config', 'tag_kb_ids']);
|
||||
|
||||
return (
|
||||
Array.isArray(ids) &&
|
||||
ids.length > 0 && <TopNTagsItem></TopNTagsItem>
|
||||
);
|
||||
}}
|
||||
</Form.Item>
|
||||
<Divider />
|
||||
</>
|
||||
);
|
||||
}
|
@ -108,7 +108,7 @@ export function TagTable() {
|
||||
variant="ghost"
|
||||
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
|
||||
>
|
||||
{t('knowledgeConfiguration.tag')}
|
||||
{t('knowledgeConfiguration.tagName')}
|
||||
<ArrowUpDown />
|
||||
</Button>
|
||||
);
|
||||
|
@ -1,12 +1,23 @@
|
||||
import { useFetchTagList } from '@/hooks/knowledge-hooks';
|
||||
import { Chart } from '@antv/g2';
|
||||
import { useCallback, useEffect, useRef } from 'react';
|
||||
import { sumBy } from 'lodash';
|
||||
import { useCallback, useEffect, useMemo, useRef } from 'react';
|
||||
|
||||
export function TagWordCloud() {
|
||||
const domRef = useRef<HTMLDivElement>(null);
|
||||
let chartRef = useRef<Chart>();
|
||||
const { list } = useFetchTagList();
|
||||
|
||||
const { list: tagList } = useMemo(() => {
|
||||
const nextList = list.sort((a, b) => b[1] - a[1]).slice(0, 256);
|
||||
|
||||
return {
|
||||
list: nextList.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
|
||||
sumValue: sumBy(nextList, (x: [string, number]) => x[1]),
|
||||
length: nextList.length,
|
||||
};
|
||||
}, [list]);
|
||||
|
||||
const renderWordCloud = useCallback(() => {
|
||||
if (domRef.current) {
|
||||
chartRef.current = new Chart({ container: domRef.current });
|
||||
@ -14,19 +25,30 @@ export function TagWordCloud() {
|
||||
chartRef.current.options({
|
||||
type: 'wordCloud',
|
||||
autoFit: true,
|
||||
layout: { fontSize: [20, 100] },
|
||||
layout: {
|
||||
fontSize: [20, 100],
|
||||
// fontSize: (d: any) => {
|
||||
// if (d.value) {
|
||||
// return (d.value / sumValue) * 100 * (length / 10);
|
||||
// }
|
||||
// return 0;
|
||||
// },
|
||||
},
|
||||
data: {
|
||||
type: 'inline',
|
||||
value: list.map((x) => ({ text: x[0], value: x[1], name: x[0] })),
|
||||
value: tagList,
|
||||
},
|
||||
encode: { color: 'text' },
|
||||
legend: false,
|
||||
tooltip: false,
|
||||
tooltip: {
|
||||
title: 'name', // title
|
||||
items: ['value'], // data item
|
||||
},
|
||||
});
|
||||
|
||||
chartRef.current.render();
|
||||
}
|
||||
}, [list]);
|
||||
}, [tagList]);
|
||||
|
||||
useEffect(() => {
|
||||
renderWordCloud();
|
||||
|
Loading…
x
Reference in New Issue
Block a user