mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-22 14:10:01 +08:00
refine mindmap (#1817)
### What problem does this PR solve? #1594 ### Type of change - [x] Refactoring
This commit is contained in:
parent
5650442b0b
commit
3fd7db40ea
@ -21,6 +21,7 @@ from typing import List
|
||||
import networkx as nx
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.db.services.user_service import TenantService
|
||||
from graphrag.community_reports_extractor import CommunityReportsExtractor
|
||||
from graphrag.entity_resolution import EntityResolution
|
||||
from graphrag.graph_extractor import GraphExtractor
|
||||
@ -30,6 +31,11 @@ from rag.utils import num_tokens_from_string
|
||||
|
||||
|
||||
def be_children(obj: dict, keyset:set):
|
||||
if isinstance(obj, str):
|
||||
obj = [obj]
|
||||
if isinstance(obj, list):
|
||||
for i in obj: keyset.add(i)
|
||||
return [{"id": i, "children":[]} for i in obj]
|
||||
arr = []
|
||||
for k,v in obj.items():
|
||||
k = re.sub(r"\*+", "", k)
|
||||
@ -65,7 +71,8 @@ def graph_merge(g1, g2):
|
||||
|
||||
|
||||
def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, entity_types=["organization", "person", "location", "event", "time"]):
|
||||
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT)
|
||||
_, tenant = TenantService.get_by_id(tenant_id)
|
||||
llm_bdl = LLMBundle(tenant_id, LLMType.CHAT, tenant.llm_id)
|
||||
ext = GraphExtractor(llm_bdl)
|
||||
left_token_count = llm_bdl.max_length - ext.prompt_token_count - 1024
|
||||
left_token_count = max(llm_bdl.max_length * 0.8, left_token_count)
|
||||
|
@ -13,7 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import collections
|
||||
import logging
|
||||
import re
|
||||
import logging
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
@ -65,7 +67,7 @@ class MindMapExtractor:
|
||||
try:
|
||||
exe = ThreadPoolExecutor(max_workers=12)
|
||||
threads = []
|
||||
token_count = self._llm.max_length * 0.7
|
||||
token_count = max(self._llm.max_length * 0.8, self._llm.max_length-512)
|
||||
texts = []
|
||||
res = []
|
||||
cnt = 0
|
||||
@ -122,6 +124,19 @@ class MindMapExtractor:
|
||||
continue
|
||||
return data
|
||||
|
||||
def _todict(self, layer:collections.OrderedDict):
|
||||
to_ret = layer
|
||||
if isinstance(layer, collections.OrderedDict):
|
||||
to_ret = dict(layer)
|
||||
|
||||
try:
|
||||
for key, value in to_ret.items():
|
||||
to_ret[key] = self._todict(value)
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return self._list_to_kv(to_ret)
|
||||
|
||||
def _process_document(
|
||||
self, text: str, prompt_variables: dict[str, str]
|
||||
) -> str:
|
||||
@ -132,6 +147,7 @@ class MindMapExtractor:
|
||||
text = perform_variable_replacements(self._mind_map_prompt, variables=variables)
|
||||
gen_conf = {"temperature": 0.5}
|
||||
response = self._llm.chat(text, [], gen_conf)
|
||||
response = re.sub(r"```[^\n]*", "", response)
|
||||
print(response)
|
||||
print("---------------------------------------------------\n", markdown_to_json.dictify(response))
|
||||
return dict(markdown_to_json.dictify(response))
|
||||
print("---------------------------------------------------\n", self._todict(markdown_to_json.dictify(response)))
|
||||
return self._todict(markdown_to_json.dictify(response))
|
||||
|
@ -14,28 +14,20 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
MIND_MAP_EXTRACTION_PROMPT = """
|
||||
- Role: You're a talent text processor.
|
||||
- Role: You're a talent text processor to summarize a piece of text into a mind map.
|
||||
|
||||
- Step of task:
|
||||
1. Generate a title for user's 'TEXT'。
|
||||
2. Classify the 'TEXT' into sections as you see fit.
|
||||
3. If the subject matter is really complex, split them into sub-sections.
|
||||
- Step of task:
|
||||
1. Generate a title for user's 'TEXT'。
|
||||
2. Classify the 'TEXT' into sections of a mind map.
|
||||
3. If the subject matter is really complex, split them into sub-sections and sub-subsections.
|
||||
4. Add a shot content summary of the bottom level section.
|
||||
|
||||
- Output requirement:
|
||||
- Always try to maximize the number of sub-sections.
|
||||
- In language of 'Text'
|
||||
- MUST IN FORMAT OF MARKDOWN
|
||||
|
||||
- Output requirement:
|
||||
- In language of
|
||||
- MUST IN FORMAT OF MARKDOWN
|
||||
|
||||
Output:
|
||||
## <Title>
|
||||
<Section Name>
|
||||
<Section Name>
|
||||
<Subsection Name>
|
||||
<Subsection Name>
|
||||
<Section Name>
|
||||
<Subsection Name>
|
||||
|
||||
-TEXT-
|
||||
{input_text}
|
||||
|
||||
Output:
|
||||
"""
|
Loading…
x
Reference in New Issue
Block a user