diff --git a/graphrag/general/community_reports_extractor.py b/graphrag/general/community_reports_extractor.py index e6c2c4b80..aa04a82b4 100644 --- a/graphrag/general/community_reports_extractor.py +++ b/graphrag/general/community_reports_extractor.py @@ -70,6 +70,8 @@ class CommunityReportsExtractor(Extractor): weight = ents["weight"] ents = ents["nodes"] ent_df = pd.DataFrame(self._get_entity_(ents)).dropna()#[{"entity": n, **graph.nodes[n]} for n in ents]) + if ent_df.empty: + continue ent_df["entity"] = ent_df["entity_name"] del ent_df["entity_name"] rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000)) diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py index 401a1c0cc..ffcb889a5 100644 --- a/graphrag/general/extractor.py +++ b/graphrag/general/extractor.py @@ -99,6 +99,7 @@ class Extractor: with ThreadPoolExecutor(max_workers=max_workers) as exe: threads = [] for i, (cid, ck) in enumerate(chunks): + ck = truncate(ck, int(self._llm.max_length*0.8)) threads.append( exe.submit(self._process_single_content, (cid, ck))) @@ -241,5 +242,5 @@ class Extractor: ) use_prompt = prompt_template.format(**context_base) logging.info(f"Trigger summary: {entity_or_relation_name}") - summary = self._chat(use_prompt, [{"role": "assistant", "content": "Output: "}], {"temperature": 0.8}) + summary = self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8}) return summary diff --git a/graphrag/general/graph_extractor.py b/graphrag/general/graph_extractor.py index b709c3af4..acfccff15 100644 --- a/graphrag/general/graph_extractor.py +++ b/graphrag/general/graph_extractor.py @@ -120,7 +120,7 @@ class GraphExtractor(Extractor): token_count += num_tokens_from_string(hint_prompt + response) results = response or "" - history = [{"role": "system", "content": hint_prompt}, {"role": "assistant", "content": response}] + history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}] # Repeat to ensure we maximize entity count for i in range(self._max_gleanings): diff --git a/graphrag/light/graph_extractor.py b/graphrag/light/graph_extractor.py index c0232b592..c81c4726d 100644 --- a/graphrag/light/graph_extractor.py +++ b/graphrag/light/graph_extractor.py @@ -91,7 +91,7 @@ class GraphExtractor(Extractor): ).format(**self._context_base, input_text=content) try: - gen_conf = {"temperature": 0.3} + gen_conf = {"temperature": 0.8} final_result = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf) token_count += num_tokens_from_string(hint_prompt + final_result) history = pack_user_ass_to_openai_messages(hint_prompt, final_result)