diff --git a/graphrag/index.py b/graphrag/index.py index a2914c19f..cdc77fb98 100644 --- a/graphrag/index.py +++ b/graphrag/index.py @@ -86,13 +86,15 @@ def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, ent for i in range(len(chunks)): tkn_cnt = num_tokens_from_string(chunks[i]) if cnt+tkn_cnt >= left_token_count and texts: - threads.append(exe.submit(ext, texts, {"entity_types": entity_types}, callback)) + for b in range(0, len(texts), 16): + threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) texts = [] cnt = 0 texts.append(chunks[i]) cnt += tkn_cnt if texts: - threads.append(exe.submit(ext, texts)) + for b in range(0, len(texts), 16): + threads.append(exe.submit(ext, ["\n".join(texts[b:b+16])], {"entity_types": entity_types}, callback)) callback(0.5, "Extracting entities.") graphs = []