feat: Added explanation on the parsing method of knowledge graph #1594 (#1916)

### What problem does this PR solve?

feat: Added explanation on the parsing method of knowledge graph #1594

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
balibabu 2024-08-12 18:56:01 +08:00 committed by GitHub
parent 936d8ab7dd
commit eb8feaf20a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 226 additions and 7 deletions

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 747 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 134 KiB

View File

@ -27,7 +27,7 @@ const ParserListMap = new Map([
'one',
'qa',
'manual',
'knowledge_graph'
'knowledge_graph',
],
],
[
@ -67,7 +67,7 @@ const ParserListMap = new Map([
],
[['md'], ['naive', 'qa', 'knowledge_graph']],
[['json'], ['naive', 'knowledge_graph']],
[['eml'], ['email']]
[['eml'], ['email']],
]);
const getParserList = (

View File

@ -199,7 +199,7 @@ export default {
We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents.
So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
</p>`,
naive: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT</b>.</p>
naive: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML</b>.</p>
<p>This method apply the naive ways to chunk files: </p>
<p>
<li>Successive text will be sliced into pieces using vision detection model.</li>
@ -271,6 +271,13 @@ export default {
</p><p>
If you want to summarize something that needs all the context of an article and the selected LLM's context length covers the document length, you can try this method.
</p>`,
knowledgeGraph: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML</b>
<p>After files being chunked, it uses chunks to extract knowledge graph and mind map of the entire document. This method apply the naive ways to chunk files:
Successive text will be sliced into pieces each of which is around 512 token number.</p>
<p>Next, chunks will be transmited to LLM to extract nodes and relationships of a knowledge graph, and a mind map.</p>
Mind the entiry type you need to specify.</p>`,
useRaptor: 'Use RAPTOR to enhance retrieval',
useRaptorTip:
'Recursive Abstractive Processing for Tree-Organized Retrieval, please refer to https://huggingface.co/papers/2401.18059',

View File

@ -190,7 +190,7 @@ export default {
使
</p>`,
naive: `<p>支持的文件格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT</b>。</p>
naive: `<p>支持的文件格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>。</p>
<p></p>
<p>
<li>使</li>
@ -244,6 +244,13 @@ export default {
</p><p>
西LLM的上下文長度覆蓋了文檔長度
</p>`,
knowledgeGraph: `<p>支援的檔案格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>
<p>使
512
<p>LLM以提取知識圖譜和思維導圖的節點和關係
<p></p></p>`,
useRaptor: '使用RAPTOR文件增強策略',
useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
prompt: '提示詞',

View File

@ -191,7 +191,7 @@ export default {
使
</p>`,
naive: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT</b>。</p>
naive: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>。</p>
<p></p>
<p>
<li>使</li>
@ -261,6 +261,13 @@ export default {
</p><p>
西LLM的上下文长度覆盖了文档长度
</p>`,
knowledgeGraph: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>
<p>使
512 token </p>
<p> LLM </p>
</p>`,
useRaptor: '使用召回增强RAPTOR策略',
useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
prompt: '提示词',

View File

@ -3,6 +3,7 @@ import { useTranslate } from '@/hooks/common-hooks';
import { useSelectParserList } from '@/hooks/user-setting-hooks';
import { Col, Divider, Empty, Row, Typography } from 'antd';
import DOMPurify from 'dompurify';
import camelCase from 'lodash/camelCase';
import { useMemo } from 'react';
import styles from './index.less';
import { ImageMap } from './utils';
@ -18,7 +19,7 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
if (item) {
return {
title: item.label,
description: t(item.value),
description: t(camelCase(item.value)),
};
}
return { title: '', description: '' };

View File

@ -37,6 +37,9 @@ export const useSubmitKnowledgeConfiguration = (form: FormInstance) => {
};
};
// The value that does not need to be displayed in the analysis method Select
const HiddenFields = ['email', 'picture', 'audio'];
export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
const parserList = useSelectParserList();
const allOptions = useSelectLlmOptionsByModelType();
@ -62,7 +65,9 @@ export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
}, [form, knowledgeDetails]);
return {
parserList,
parserList: parserList.filter(
(x) => !HiddenFields.some((y) => y === x.value),
),
embeddingModelOptions: allOptions[LlmModelType.Embedding],
disabled: knowledgeDetails.chunk_num > 0,
};

View File

@ -15,6 +15,7 @@ export const ImageMap = {
resume: getImageName('resume', 2),
table: getImageName('table', 2),
one: getImageName('one', 2),
knowledge_graph: getImageName('knowledge-graph', 2),
};
export const TextMap = {