Fix too long context issue. (#4735)

### What problem does this PR solve? #4728 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-07-31 04:11:58 +08:00 · 2025-02-06 11:37:23 +08:00 · 2025-02-06 11:37:23 +08:00 · 2a07eb69a7
commit 2a07eb69a7
parent a3a70431f3
4 changed files with 6 additions and 3 deletions
--- a/graphrag/general/community_reports_extractor.py
+++ b/graphrag/general/community_reports_extractor.py
@ -70,6 +70,8 @@ class CommunityReportsExtractor(Extractor):
                weight = ents["weight"]
                ents = ents["nodes"]
                ent_df = pd.DataFrame(self._get_entity_(ents)).dropna()#[{"entity": n, **graph.nodes[n]} for n in ents])
+                if ent_df.empty:
+                    continue
                ent_df["entity"] = ent_df["entity_name"]
                del ent_df["entity_name"]
                rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000))
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@ -99,6 +99,7 @@ class Extractor:
        with ThreadPoolExecutor(max_workers=max_workers) as exe:
            threads = []
            for i, (cid, ck) in enumerate(chunks):
+                ck = truncate(ck, int(self._llm.max_length*0.8))
                threads.append(
                    exe.submit(self._process_single_content, (cid, ck)))

@ -241,5 +242,5 @@ class Extractor:
        )
        use_prompt = prompt_template.format(**context_base)
        logging.info(f"Trigger summary: {entity_or_relation_name}")
-        summary = self._chat(use_prompt, [{"role": "assistant", "content": "Output: "}], {"temperature": 0.8})
+        summary = self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8})
        return summary
--- a/graphrag/general/graph_extractor.py
+++ b/graphrag/general/graph_extractor.py
@ -120,7 +120,7 @@ class GraphExtractor(Extractor):
            token_count += num_tokens_from_string(hint_prompt + response)

            results = response or ""
-            history = [{"role": "system", "content": hint_prompt}, {"role": "assistant", "content": response}]
+            history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]

            # Repeat to ensure we maximize entity count
            for i in range(self._max_gleanings):
--- a/graphrag/light/graph_extractor.py
+++ b/graphrag/light/graph_extractor.py
@ -91,7 +91,7 @@ class GraphExtractor(Extractor):
        ).format(**self._context_base, input_text=content)

        try:
-            gen_conf = {"temperature": 0.3}
+            gen_conf = {"temperature": 0.8}
            final_result = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)
            token_count += num_tokens_from_string(hint_prompt + final_result)
            history = pack_user_ass_to_openai_messages(hint_prompt, final_result)