mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-05-29 17:45:33 +08:00
### What problem does this PR solve? feat: Added explanation on the parsing method of knowledge graph #1594 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
936d8ab7dd
commit
eb8feaf20a
98
web/src/assets/svg/chunk-method/knowledge-graph-01.svg
Normal file
98
web/src/assets/svg/chunk-method/knowledge-graph-01.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 747 KiB |
93
web/src/assets/svg/chunk-method/knowledge-graph-02.svg
Normal file
93
web/src/assets/svg/chunk-method/knowledge-graph-02.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 134 KiB |
@ -27,7 +27,7 @@ const ParserListMap = new Map([
|
||||
'one',
|
||||
'qa',
|
||||
'manual',
|
||||
'knowledge_graph'
|
||||
'knowledge_graph',
|
||||
],
|
||||
],
|
||||
[
|
||||
@ -67,7 +67,7 @@ const ParserListMap = new Map([
|
||||
],
|
||||
[['md'], ['naive', 'qa', 'knowledge_graph']],
|
||||
[['json'], ['naive', 'knowledge_graph']],
|
||||
[['eml'], ['email']]
|
||||
[['eml'], ['email']],
|
||||
]);
|
||||
|
||||
const getParserList = (
|
||||
|
@ -199,7 +199,7 @@ export default {
|
||||
We assume manual has hierarchical section structure. We use the lowest section titles as pivots to slice documents.
|
||||
So, the figures and tables in the same section will not be sliced apart, and chunk size might be large.
|
||||
</p>`,
|
||||
naive: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT</b>.</p>
|
||||
naive: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML</b>.</p>
|
||||
<p>This method apply the naive ways to chunk files: </p>
|
||||
<p>
|
||||
<li>Successive text will be sliced into pieces using vision detection model.</li>
|
||||
@ -271,6 +271,13 @@ export default {
|
||||
</p><p>
|
||||
If you want to summarize something that needs all the context of an article and the selected LLM's context length covers the document length, you can try this method.
|
||||
</p>`,
|
||||
knowledgeGraph: `<p>Supported file formats are <b>DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML</b>
|
||||
|
||||
<p>After files being chunked, it uses chunks to extract knowledge graph and mind map of the entire document. This method apply the naive ways to chunk files:
|
||||
Successive text will be sliced into pieces each of which is around 512 token number.</p>
|
||||
<p>Next, chunks will be transmited to LLM to extract nodes and relationships of a knowledge graph, and a mind map.</p>
|
||||
|
||||
Mind the entiry type you need to specify.</p>`,
|
||||
useRaptor: 'Use RAPTOR to enhance retrieval',
|
||||
useRaptorTip:
|
||||
'Recursive Abstractive Processing for Tree-Organized Retrieval, please refer to https://huggingface.co/papers/2401.18059',
|
||||
|
@ -190,7 +190,7 @@ export default {
|
||||
我們假設手冊具有分層部分結構。我們使用最低的部分標題作為對文檔進行切片的樞軸。
|
||||
因此,同一部分中的圖和表不會被分割,並且塊大小可能會很大。
|
||||
</p>`,
|
||||
naive: `<p>支持的文件格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT</b>。</p>
|
||||
naive: `<p>支持的文件格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>。</p>
|
||||
<p>此方法將簡單的方法應用於塊文件:</p>
|
||||
<p>
|
||||
<li>系統將使用視覺檢測模型將連續文本分割成多個片段。</li>
|
||||
@ -244,6 +244,13 @@ export default {
|
||||
</p><p>
|
||||
如果你要總結的東西需要一篇文章的全部上下文,並且所選LLM的上下文長度覆蓋了文檔長度,你可以嘗試這種方法。
|
||||
</p>`,
|
||||
knowledgeGraph: `<p>支援的檔案格式為<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>
|
||||
|
||||
<p>文件分塊後,使用分塊擷取整個文件的知識圖譜和心智圖。此方法將簡單的方法應用於區塊檔案:
|
||||
連續的文字將被分割成多個片段,每個片段大約有 512 個令牌數。
|
||||
<p>接下來,區塊將傳送到LLM以提取知識圖譜和思維導圖的節點和關係。
|
||||
|
||||
<p>請注意您需要指定的條目類型。</p></p>`,
|
||||
useRaptor: '使用RAPTOR文件增強策略',
|
||||
useRaptorTip: '請參考 https://huggingface.co/papers/2401.18059',
|
||||
prompt: '提示詞',
|
||||
|
@ -191,7 +191,7 @@ export default {
|
||||
我们假设手册具有分层部分结构。 我们使用最低的部分标题作为对文档进行切片的枢轴。
|
||||
因此,同一部分中的图和表不会被分割,并且块大小可能会很大。
|
||||
</p>`,
|
||||
naive: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT</b>。</p>
|
||||
naive: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>。</p>
|
||||
<p>此方法将简单的方法应用于块文件:</p>
|
||||
<p>
|
||||
<li>系统将使用视觉检测模型将连续文本分割成多个片段。</li>
|
||||
@ -261,6 +261,13 @@ export default {
|
||||
</p><p>
|
||||
如果你要总结的东西需要一篇文章的全部上下文,并且所选LLM的上下文长度覆盖了文档长度,你可以尝试这种方法。
|
||||
</p>`,
|
||||
knowledgeGraph: `<p>支持的文件格式为<b>DOCX、EXCEL、PPT、IMAGE、PDF、TXT、MD、JSON、EML</b>
|
||||
|
||||
<p>文件分块后,使用分块提取整个文档的知识图谱和思维导图。此方法将简单的方法应用于分块文件:
|
||||
连续的文本将被切成大约 512 个 token 数的块。</p>
|
||||
<p>接下来,将分块传输到 LLM 以提取知识图谱和思维导图的节点和关系。</p>
|
||||
|
||||
注意您需要指定的条目类型。</p>`,
|
||||
useRaptor: '使用召回增强RAPTOR策略',
|
||||
useRaptorTip: '请参考 https://huggingface.co/papers/2401.18059',
|
||||
prompt: '提示词',
|
||||
|
@ -3,6 +3,7 @@ import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { useSelectParserList } from '@/hooks/user-setting-hooks';
|
||||
import { Col, Divider, Empty, Row, Typography } from 'antd';
|
||||
import DOMPurify from 'dompurify';
|
||||
import camelCase from 'lodash/camelCase';
|
||||
import { useMemo } from 'react';
|
||||
import styles from './index.less';
|
||||
import { ImageMap } from './utils';
|
||||
@ -18,7 +19,7 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
|
||||
if (item) {
|
||||
return {
|
||||
title: item.label,
|
||||
description: t(item.value),
|
||||
description: t(camelCase(item.value)),
|
||||
};
|
||||
}
|
||||
return { title: '', description: '' };
|
||||
|
@ -37,6 +37,9 @@ export const useSubmitKnowledgeConfiguration = (form: FormInstance) => {
|
||||
};
|
||||
};
|
||||
|
||||
// The value that does not need to be displayed in the analysis method Select
|
||||
const HiddenFields = ['email', 'picture', 'audio'];
|
||||
|
||||
export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
|
||||
const parserList = useSelectParserList();
|
||||
const allOptions = useSelectLlmOptionsByModelType();
|
||||
@ -62,7 +65,9 @@ export const useFetchKnowledgeConfigurationOnMount = (form: FormInstance) => {
|
||||
}, [form, knowledgeDetails]);
|
||||
|
||||
return {
|
||||
parserList,
|
||||
parserList: parserList.filter(
|
||||
(x) => !HiddenFields.some((y) => y === x.value),
|
||||
),
|
||||
embeddingModelOptions: allOptions[LlmModelType.Embedding],
|
||||
disabled: knowledgeDetails.chunk_num > 0,
|
||||
};
|
||||
|
@ -15,6 +15,7 @@ export const ImageMap = {
|
||||
resume: getImageName('resume', 2),
|
||||
table: getImageName('table', 2),
|
||||
one: getImageName('one', 2),
|
||||
knowledge_graph: getImageName('knowledge-graph', 2),
|
||||
};
|
||||
|
||||
export const TextMap = {
|
||||
|
Loading…
x
Reference in New Issue
Block a user