Feat: Metadata in documents for improve the prompt #3690 (#4462)

### What problem does this PR solve?

Feat: Metadata in documents for improve the prompt #3690

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2025-01-13 17:13:37 +08:00 committed by GitHub
parent 46c52d65b7
commit d8346cb7a6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 270 additions and 13 deletions

View File

@ -1,7 +1,10 @@
import { IReferenceChunk } from '@/interfaces/database/chat';
import { IDocumentInfo } from '@/interfaces/database/document';
import { IChunk } from '@/interfaces/database/knowledge';
import { IChangeParserConfigRequestBody } from '@/interfaces/request/document';
import {
IChangeParserConfigRequestBody,
IDocumentMetaRequestBody,
} from '@/interfaces/request/document';
import i18n from '@/locales/config';
import chatService from '@/services/chat-service';
import kbService from '@/services/knowledge-service';
@ -396,7 +399,6 @@ export const useRemoveNextDocument = () => {
};
export const useDeleteDocument = () => {
// const queryClient = useQueryClient();
const {
data,
isPending: loading,
@ -405,9 +407,7 @@ export const useDeleteDocument = () => {
mutationKey: ['deleteDocument'],
mutationFn: async (documentIds: string[]) => {
const data = await kbService.document_delete({ doc_ids: documentIds });
// if (data.code === 0) {
// queryClient.invalidateQueries({ queryKey: ['fetchFlowList'] });
// }
return data;
},
});
@ -441,9 +441,7 @@ export const useUploadAndParseDocument = (uploadMethod: string) => {
}
const data = await chatService.uploadAndParseExternal(formData);
return data?.data;
} catch (error) {
console.log('🚀 ~ useUploadAndParseDocument ~ error:', error);
}
} catch (error) {}
},
});
@ -465,7 +463,6 @@ export const useParseDocument = () => {
}
return data;
} catch (error) {
console.log('🚀 ~ mutationFn: ~ error:', error);
message.error('error');
}
},
@ -473,3 +470,34 @@ export const useParseDocument = () => {
return { parseDocument: mutateAsync, data, loading };
};
export const useSetDocumentMeta = () => {
const queryClient = useQueryClient();
const {
data,
isPending: loading,
mutateAsync,
} = useMutation({
mutationKey: ['setDocumentMeta'],
mutationFn: async (params: IDocumentMetaRequestBody) => {
try {
const { data } = await kbService.setMeta({
meta: params.meta,
doc_id: params.documentId,
});
if (data?.code === 0) {
queryClient.invalidateQueries({ queryKey: ['fetchDocumentList'] });
message.success(i18n.t('message.modified'));
}
return data?.code;
} catch (error) {
message.error('error');
}
},
});
return { setDocumentMeta: mutateAsync, data, loading };
};

View File

@ -24,6 +24,7 @@ export interface IDocumentInfo {
type: string;
update_date: string;
update_time: number;
meta_fields?: Record<string, any>;
}
export interface IParserConfig {

View File

@ -10,3 +10,8 @@ export interface IChangeParserRequestBody {
doc_id: string;
parser_config: IChangeParserConfigRequestBody;
}
export interface IDocumentMetaRequestBody {
documentId: string;
meta: string; // json format string
}

View File

@ -169,6 +169,28 @@ export default {
autoQuestions: 'Auto-question',
autoQuestionsTip: `Automatically extract N questions for each chunk to increase their ranking for queries containing those questions. You can check or update the added questions for a chunk from the chunk list. This feature will not disrupt the chunking process if an error occurs, except that it may add an empty result to the original chunk. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`,
redo: 'Do you want to clear the existing {{chunkNum}} chunks?',
setMetaData: 'Set Meta Data',
pleaseInputJson: 'Please enter JSON',
documentMetaTips: `<p>The meta data is in Json format(it's not searchable). It will be added into prompt for LLM if any chunks of this document are included in the prompt.</p>
<p>Examples:</p>
<b>The meta data is:</b><br>
<code>
{
"Author": "Alex Dowson",
"Date": "2024-11-12"
}
</code><br>
<b>The prompt will be:</b><br>
<p>Document: the_name_of_document</p>
<p>Author: Alex Dowson</p>
<p>Date: 2024-11-12</p>
<p>Relevant fragments as following:</p>
<ul>
<li> Here is the chunk content....</li>
<li> Here is the chunk content....</li>
</ul>
`,
metaData: 'Meta data',
},
knowledgeConfiguration: {
titleDescription:

View File

@ -165,6 +165,27 @@ export default {
autoQuestions: '自動問題',
autoQuestionsTip: `在查詢此類問題時,為每個區塊提取 N 個問題以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。如果發生錯誤此功能不會破壞整個分塊過程除了將空結果新增至原始區塊。 `,
redo: '是否清空已有 {{chunkNum}}個 chunk',
setMetaData: '設定元數據',
pleaseInputJson: '請輸入JSON',
documentMetaTips: `<p>元資料為 Json 格式(不可搜尋)。如果提示中包含該文件的任何部分,它將被添加到 LLM 提示中。
<p></p>
<b></b><br>
<code>
{
"Author": "Alex Dowson",
"Date": "2024-11-12"
}
</code><br>
<b></b><br>
<p></p>
<p>Alex Dowson</p>
<p>2024-11-12</p>
<p></p>
<ul>
<li>....</li>
<li>....</li>
</ul>
`,
},
knowledgeConfiguration: {
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',

View File

@ -166,6 +166,28 @@ export default {
autoQuestions: '自动问题',
autoQuestionsTip: `在查询此类问题时,为每个块提取 N 个问题以提高其排名得分。在“系统模型设置”中设置的 LLM 将消耗额外的 token。您可以在块列表中查看结果。如果发生错误此功能不会破坏整个分块过程除了将空结果添加到原始块。`,
redo: '是否清空已有 {{chunkNum}}个 chunk',
setMetaData: '设置元数据',
pleaseInputJson: '请输入JSON',
documentMetaTips: `<p>元数据为 Json 格式(不可搜索)。如果提示中包含此文档的任何块,它将被添加到 LLM 的提示中。</p>
<p></p>
<b></b><br>
<code>
{
Alex Dowson
2024-11-12
}
</code><br>
<b></b><br>
<p>the_name_of_document</p>
<p>Alex Dowson</p>
<p>2024-11-12</p>
<p></p>
<ul>
<li> ....</li>
<li> ....</li>
</ul>
`,
metaData: '元資料',
},
knowledgeConfiguration: {
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',

View File

@ -4,6 +4,7 @@ import {
useNextWebCrawl,
useRunNextDocument,
useSaveNextDocumentName,
useSetDocumentMeta,
useSetNextDocumentParser,
useUploadNextDocument,
} from '@/hooks/document-hooks';
@ -236,3 +237,34 @@ export const useHandleRunDocumentByIds = (id: string) => {
loading: isLoading,
};
};
export const useShowMetaModal = (documentId: string) => {
const { setDocumentMeta, loading } = useSetDocumentMeta();
const {
visible: setMetaVisible,
hideModal: hideSetMetaModal,
showModal: showSetMetaModal,
} = useSetModalState();
const onSetMetaModalOk = useCallback(
async (meta: string) => {
const ret = await setDocumentMeta({
documentId,
meta,
});
if (ret === 0) {
hideSetMetaModal();
}
},
[setDocumentMeta, documentId, hideSetMetaModal],
);
return {
setMetaLoading: loading,
onSetMetaModalOk,
setMetaVisible,
hideSetMetaModal,
showSetMetaModal,
};
};

View File

@ -20,6 +20,7 @@ import {
useHandleWebCrawl,
useNavigateToOtherPage,
useRenameDocument,
useShowMetaModal,
} from './hooks';
import ParsingActionCell from './parsing-action-cell';
import ParsingStatusCell from './parsing-status-cell';
@ -30,6 +31,7 @@ import FileUploadModal from '@/components/file-upload-modal';
import { IDocumentInfo } from '@/interfaces/database/document';
import { formatDate } from '@/utils/date';
import styles from './index.less';
import { SetMetaModal } from './set-meta-modal';
const { Text } = Typography;
@ -79,6 +81,14 @@ const KnowledgeFile = () => {
keyPrefix: 'knowledgeDetails',
});
const {
showSetMetaModal,
hideSetMetaModal,
setMetaVisible,
setMetaLoading,
onSetMetaModalOk,
} = useShowMetaModal(currentRecord.id);
const rowSelection = useGetRowSelection();
const columns: ColumnsType<IDocumentInfo> = [
@ -157,6 +167,7 @@ const KnowledgeFile = () => {
setCurrentRecord={setRecord}
showRenameModal={showRenameModal}
showChangeParserModal={showChangeParserModal}
showSetMetaModal={showSetMetaModal}
record={record}
></ParsingActionCell>
),
@ -225,6 +236,15 @@ const KnowledgeFile = () => {
loading={webCrawlUploadLoading}
onOk={onWebCrawlUploadOk}
></WebCrawlModal>
{setMetaVisible && (
<SetMetaModal
visible={setMetaVisible}
hideModal={hideSetMetaModal}
onOk={onSetMetaModalOk}
loading={setMetaLoading}
initialMetaData={currentRecord.meta_fields}
></SetMetaModal>
)}
</div>
);
};

View File

@ -11,6 +11,7 @@ import {
import { Button, Dropdown, MenuProps, Space, Tooltip } from 'antd';
import { isParserRunning } from '../utils';
import { useCallback } from 'react';
import { DocumentType } from '../constant';
import styles from './index.less';
@ -19,6 +20,7 @@ interface IProps {
setCurrentRecord: (record: IDocumentInfo) => void;
showRenameModal: () => void;
showChangeParserModal: () => void;
showSetMetaModal: () => void;
}
const ParsingActionCell = ({
@ -26,6 +28,7 @@ const ParsingActionCell = ({
setCurrentRecord,
showRenameModal,
showChangeParserModal,
showSetMetaModal,
}: IProps) => {
const documentId = record.id;
const isRunning = isParserRunning(record.run);
@ -47,9 +50,9 @@ const ParsingActionCell = ({
});
};
const setRecord = () => {
const setRecord = useCallback(() => {
setCurrentRecord(record);
};
}, [record, setCurrentRecord]);
const onShowRenameModal = () => {
setRecord();
@ -60,17 +63,33 @@ const ParsingActionCell = ({
showChangeParserModal();
};
const onShowSetMetaModal = useCallback(() => {
setRecord();
showSetMetaModal();
}, [setRecord, showSetMetaModal]);
const chunkItems: MenuProps['items'] = [
{
key: '1',
label: (
<div>
<div className="flex flex-col">
<Button type="link" onClick={onShowChangeParserModal}>
{t('chunkMethod')}
</Button>
</div>
),
},
{ type: 'divider' },
{
key: '2',
label: (
<div className="flex flex-col">
<Button type="link" onClick={onShowSetMetaModal}>
{t('setMetaData')}
</Button>
</div>
),
},
];
return (

View File

@ -0,0 +1,81 @@
import { IModalProps } from '@/interfaces/common';
import { IDocumentInfo } from '@/interfaces/database/document';
import Editor, { loader } from '@monaco-editor/react';
import { Form, Modal } from 'antd';
import DOMPurify from 'dompurify';
import { useCallback, useEffect } from 'react';
import { useTranslation } from 'react-i18next';
loader.config({ paths: { vs: '/vs' } });
type FieldType = {
meta?: string;
};
export function SetMetaModal({
visible,
hideModal,
onOk,
initialMetaData,
}: IModalProps<any> & { initialMetaData?: IDocumentInfo['meta_fields'] }) {
const { t } = useTranslation();
const [form] = Form.useForm();
const handleOk = useCallback(async () => {
const values = await form.validateFields();
onOk?.(values.meta);
}, [form, onOk]);
useEffect(() => {
form.setFieldValue('meta', JSON.stringify(initialMetaData, null, 4));
}, [form, initialMetaData]);
return (
<Modal
title={t('knowledgeDetails.setMetaData')}
open={visible}
onOk={handleOk}
onCancel={hideModal}
>
<Form
name="basic"
initialValues={{ remember: true }}
autoComplete="off"
layout={'vertical'}
form={form}
>
<Form.Item<FieldType>
label={t('knowledgeDetails.metaData')}
name="meta"
rules={[
{
required: true,
validator(rule, value) {
try {
JSON.parse(value);
return Promise.resolve();
} catch (error) {
return Promise.reject(
new Error(t('knowledgeDetails.pleaseInputJson')),
);
}
},
},
]}
tooltip={
<div
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(
t('knowledgeDetails.documentMetaTips'),
),
}}
></div>
}
>
<Editor height={200} defaultLanguage="json" theme="vs-dark" />
</Form.Item>
</Form>
</Modal>
);
}

View File

@ -31,6 +31,7 @@ const {
document_infos,
upload_and_parse,
listTagByKnowledgeIds,
setMeta,
} = api;
const methods = {
@ -55,7 +56,7 @@ const methods = {
url: kb_list,
method: 'get',
},
// 文件管理
// document manager
get_document_list: {
url: get_document_list,
method: 'get',
@ -100,6 +101,10 @@ const methods = {
url: document_infos,
method: 'post',
},
setMeta: {
url: setMeta,
method: 'post',
},
// chunk管理
chunk_list: {
url: chunk_list,

View File

@ -70,6 +70,7 @@ export default {
document_infos: `${api_host}/document/infos`,
upload_and_parse: `${api_host}/document/upload_and_parse`,
parse: `${api_host}/document/parse`,
setMeta: `${api_host}/document/set_meta`,
// chat
setDialog: `${api_host}/dialog/set`,