Feat: Reparse a file shall reuse existing chunks if possible #3793 (#4021)

### What problem does this PR solve?

Feat: Reparse a file shall reuse existing chunks if possible #3793

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2024-12-13 16:55:13 +08:00 committed by GitHub
parent 1defe0b19b
commit 93635674c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 40 additions and 11 deletions

View File

@ -313,13 +313,16 @@ export const useRunNextDocument = () => {
mutationFn: async ({ mutationFn: async ({
documentIds, documentIds,
run, run,
shouldDelete,
}: { }: {
documentIds: string[]; documentIds: string[];
run: number; run: number;
shouldDelete: boolean;
}) => { }) => {
const ret = await kbService.document_run({ const ret = await kbService.document_run({
doc_ids: documentIds, doc_ids: documentIds,
run, run,
delete: shouldDelete,
}); });
const code = get(ret, 'data.code'); const code = get(ret, 'data.code');
if (code === 0) { if (code === 0) {

View File

@ -165,6 +165,7 @@ export default {
autoKeywordsTip: `Automatically extract N keywords for each chunk to increase their ranking for queries containing those keywords. You can check or update the added keywords for a chunk from the chunk list. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`, autoKeywordsTip: `Automatically extract N keywords for each chunk to increase their ranking for queries containing those keywords. You can check or update the added keywords for a chunk from the chunk list. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`,
autoQuestions: 'Auto-question', autoQuestions: 'Auto-question',
autoQuestionsTip: `Automatically extract N questions for each chunk to increase their ranking for queries containing those questions. You can check or update the added questions for a chunk from the chunk list. This feature will not disrupt the chunking process if an error occurs, except that it may add an empty result to the original chunk. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`, autoQuestionsTip: `Automatically extract N questions for each chunk to increase their ranking for queries containing those questions. You can check or update the added questions for a chunk from the chunk list. This feature will not disrupt the chunking process if an error occurs, except that it may add an empty result to the original chunk. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`,
redo: 'Do you want to clear the existing {{chunkNum}} chunks?',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: titleDescription:

View File

@ -161,6 +161,7 @@ export default {
autoKeywordsTip: `在查詢此類關鍵字時,為每個區塊提取 N 個關鍵字以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。 `, autoKeywordsTip: `在查詢此類關鍵字時,為每個區塊提取 N 個關鍵字以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。 `,
autoQuestions: '自動問題', autoQuestions: '自動問題',
autoQuestionsTip: `在查詢此類問題時,為每個區塊提取 N 個問題以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。如果發生錯誤此功能不會破壞整個分塊過程除了將空結果新增至原始區塊。 `, autoQuestionsTip: `在查詢此類問題時,為每個區塊提取 N 個問題以提高其排名分數。在「系統模型設定」中設定的 LLM 將消耗額外的 token。您可以在區塊清單中查看結果。如果發生錯誤此功能不會破壞整個分塊過程除了將空結果新增至原始區塊。 `,
redo: '是否清空已有 {{chunkNum}}個 chunk',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。', titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',

View File

@ -162,6 +162,7 @@ export default {
autoKeywordsTip: `在查询此类关键词时,为每个块提取 N 个关键词以提高其排名得分。在“系统模型设置”中设置的 LLM 将消耗额外的 token。您可以在块列表中查看结果。`, autoKeywordsTip: `在查询此类关键词时,为每个块提取 N 个关键词以提高其排名得分。在“系统模型设置”中设置的 LLM 将消耗额外的 token。您可以在块列表中查看结果。`,
autoQuestions: '自动问题', autoQuestions: '自动问题',
autoQuestionsTip: `在查询此类问题时,为每个块提取 N 个问题以提高其排名得分。在“系统模型设置”中设置的 LLM 将消耗额外的 token。您可以在块列表中查看结果。如果发生错误此功能不会破坏整个分块过程除了将空结果添加到原始块。`, autoQuestionsTip: `在查询此类问题时,为每个块提取 N 个问题以提高其排名得分。在“系统模型设置”中设置的 LLM 将消耗额外的 token。您可以在块列表中查看结果。如果发生错误此功能不会破坏整个分块过程除了将空结果添加到原始块。`,
redo: '是否清空已有 {{chunkNum}}个 chunk',
}, },
knowledgeConfiguration: { knowledgeConfiguration: {
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。', titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',

View File

@ -213,6 +213,7 @@ export const useHandleRunDocumentByIds = (id: string) => {
const handleRunDocumentByIds = async ( const handleRunDocumentByIds = async (
documentId: string, documentId: string,
isRunning: boolean, isRunning: boolean,
shouldDelete: boolean = false,
) => { ) => {
if (isLoading) { if (isLoading) {
return; return;
@ -222,6 +223,7 @@ export const useHandleRunDocumentByIds = (id: string) => {
await runDocumentByIds({ await runDocumentByIds({
documentIds: [documentId], documentIds: [documentId],
run: isRunning ? 2 : 1, run: isRunning ? 2 : 1,
shouldDelete,
}); });
setCurrentId(''); setCurrentId('');
} catch (error) { } catch (error) {

View File

@ -3,7 +3,15 @@ import { ReactComponent as RefreshIcon } from '@/assets/svg/refresh.svg';
import { ReactComponent as RunIcon } from '@/assets/svg/run.svg'; import { ReactComponent as RunIcon } from '@/assets/svg/run.svg';
import { useTranslate } from '@/hooks/common-hooks'; import { useTranslate } from '@/hooks/common-hooks';
import { IDocumentInfo } from '@/interfaces/database/document'; import { IDocumentInfo } from '@/interfaces/database/document';
import { Badge, DescriptionsProps, Flex, Popover, Space, Tag } from 'antd'; import {
Badge,
DescriptionsProps,
Flex,
Popconfirm,
Popover,
Space,
Tag,
} from 'antd';
import classNames from 'classnames'; import classNames from 'classnames';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import reactStringReplace from 'react-string-replace'; import reactStringReplace from 'react-string-replace';
@ -92,9 +100,11 @@ export const ParsingStatusCell = ({ record }: IProps) => {
const label = t(`knowledgeDetails.runningStatus${text}`); const label = t(`knowledgeDetails.runningStatus${text}`);
const handleOperationIconClick = () => { const handleOperationIconClick =
handleRunDocumentByIds(record.id, isRunning); (shouldDelete: boolean = false) =>
}; () => {
handleRunDocumentByIds(record.id, isRunning, shouldDelete);
};
return record.type === DocumentType.Virtual ? null : ( return record.type === DocumentType.Virtual ? null : (
<Flex justify={'space-between'} align="center"> <Flex justify={'space-between'} align="center">
@ -111,14 +121,25 @@ export const ParsingStatusCell = ({ record }: IProps) => {
)} )}
</Tag> </Tag>
</Popover> </Popover>
<div <Popconfirm
onClick={handleOperationIconClick} title={t(`knowledgeDetails.redo`, { chunkNum: record.chunk_num })}
className={classNames(styles.operationIcon, { onConfirm={handleOperationIconClick(true)}
[styles.operationIconSpin]: loading, onCancel={handleOperationIconClick(false)}
})} disabled={record.chunk_num === 0}
okText={t('common.ok')}
cancelText={t('common.cancel')}
> >
<OperationIcon /> <div
</div> className={classNames(styles.operationIcon, {
[styles.operationIconSpin]: loading,
})}
onClick={
record.chunk_num === 0 ? handleOperationIconClick(false) : () => {}
}
>
<OperationIcon />
</div>
</Popconfirm>
</Flex> </Flex>
); );
}; };