Fix too long context issue. (#4735)

### What problem does this PR solve?

#4728

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2025-02-06 11:37:23 +08:00 committed by GitHub
parent a3a70431f3
commit 2a07eb69a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 6 additions and 3 deletions

View File

@ -70,6 +70,8 @@ class CommunityReportsExtractor(Extractor):
weight = ents["weight"]
ents = ents["nodes"]
ent_df = pd.DataFrame(self._get_entity_(ents)).dropna()#[{"entity": n, **graph.nodes[n]} for n in ents])
if ent_df.empty:
continue
ent_df["entity"] = ent_df["entity_name"]
del ent_df["entity_name"]
rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000))

View File

@ -99,6 +99,7 @@ class Extractor:
with ThreadPoolExecutor(max_workers=max_workers) as exe:
threads = []
for i, (cid, ck) in enumerate(chunks):
ck = truncate(ck, int(self._llm.max_length*0.8))
threads.append(
exe.submit(self._process_single_content, (cid, ck)))
@ -241,5 +242,5 @@ class Extractor:
)
use_prompt = prompt_template.format(**context_base)
logging.info(f"Trigger summary: {entity_or_relation_name}")
summary = self._chat(use_prompt, [{"role": "assistant", "content": "Output: "}], {"temperature": 0.8})
summary = self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8})
return summary

View File

@ -120,7 +120,7 @@ class GraphExtractor(Extractor):
token_count += num_tokens_from_string(hint_prompt + response)
results = response or ""
history = [{"role": "system", "content": hint_prompt}, {"role": "assistant", "content": response}]
history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]
# Repeat to ensure we maximize entity count
for i in range(self._max_gleanings):

View File

@ -91,7 +91,7 @@ class GraphExtractor(Extractor):
).format(**self._context_base, input_text=content)
try:
gen_conf = {"temperature": 0.3}
gen_conf = {"temperature": 0.8}
final_result = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)
token_count += num_tokens_from_string(hint_prompt + final_result)
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)