From a199572bf8ea786691ab4038cc6d3eb26c06986e Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Wed, 7 Aug 2024 12:17:02 +0800 Subject: [PATCH] add callback to entity extraction (#1843) ### What problem does this PR solve? ### Type of change - [x] Refactoring - [ ] --- graphrag/index.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/graphrag/index.py b/graphrag/index.py index a2914c19f..cdc77fb98 100644 --- a/graphrag/index.py +++ b/graphrag/index.py @@ -86,13 +86,15 @@ def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, ent for i in range(len(chunks)): tkn_cnt = num_tokens_from_string(chunks[i]) if cnt+tkn_cnt >= left_token_count and texts: - threads.append(exe.submit(ext, texts, {"entity_types": entity_types}, callback)) + for b in range(0, len(texts), 16): + threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) texts = [] cnt = 0 texts.append(chunks[i]) cnt += tkn_cnt if texts: - threads.append(exe.submit(ext, texts)) + for b in range(0, len(texts), 16): + threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) callback(0.5, "Extracting entities.") graphs = []