From af770c5ced2151d99e4f5a780f2a89d6b8cf1f8a Mon Sep 17 00:00:00 2001 From: liuzhenghua <1090179900@qq.com> Date: Mon, 28 Apr 2025 13:31:04 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20Optimize=20GraphRAG=E2=80=99s=20LOOP=5F?= =?UTF-8?q?PROMPT=20(#7356)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? 当前graphrag的LOOP_PROMPT,会导致模型输出Y之后,继续补充了实体和关系,比较浪费时间。参照[graph rag](https://github.com/microsoft/graphrag/blob/main/graphrag/prompts/index/extract_graph.py)最新的代码,修改了LOOP_PROMPT,经过验证,修改后可以稳定的输出Y停止。 Currently, GraphRAG’s LOOP_PROMPT causes the model to keep appending entities and relationships even after outputting “Y,” which wastes time. Referring to the latest code in [graphRAG](https://github.com/microsoft/graphrag/blob/main/graphrag/prompts/index/extract_graph.py), I modified the LOOP_PROMPT, and after verification the updated prompt reliably outputs “Y” and stops. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [x] Performance Improvement - [ ] Other (please describe): Co-authored-by: liuzhenghua-jk --- graphrag/general/graph_extractor.py | 3 ++- graphrag/general/graph_prompt.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/graphrag/general/graph_extractor.py b/graphrag/general/graph_extractor.py index 375327bfa..88c1f1e62 100644 --- a/graphrag/general/graph_extractor.py +++ b/graphrag/general/graph_extractor.py @@ -130,8 +130,9 @@ class GraphExtractor(Extractor): async with chat_limiter: continuation = await trio.to_thread.run_sync(lambda: self._chat("", history, {"temperature": 0.8})) token_count += num_tokens_from_string("\n".join([m["content"] for m in history]) + response) - if continuation != "YES": + if continuation != "Y": break + history.append({"role": "assistant", "content": "Y"}) records = split_string_by_multi_markers( results, diff --git a/graphrag/general/graph_prompt.py b/graphrag/general/graph_prompt.py index 3472bc734..54ca3a1d3 100644 --- a/graphrag/general/graph_prompt.py +++ b/graphrag/general/graph_prompt.py @@ -106,7 +106,7 @@ Text: {input_text} Output:""" CONTINUE_PROMPT = "MANY entities were missed in the last extraction. Add them below using the same format:\n" -LOOP_PROMPT = "It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.\n" +LOOP_PROMPT = "It appears some entities may have still been missed. Answer Y if there are still entities that need to be added, or N if there are none. Please answer with a single letter Y or N.\n" SUMMARIZE_DESCRIPTIONS_PROMPT = """ You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.