From da2d8b8267637d041e2557c0e772a2eee6b58a6f Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 13 Aug 2024 11:21:30 +0800 Subject: [PATCH] boost paralal of graphrag (#1924) ### What problem does this PR solve? ### Type of change - [x] Performance Improvement --- graphrag/index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphrag/index.py b/graphrag/index.py index ea9c354ad..f2835cc7a 100644 --- a/graphrag/index.py +++ b/graphrag/index.py @@ -61,11 +61,11 @@ def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, ent assert left_token_count > 0, f"The LLM context length({llm_bdl.max_length}) is smaller than prompt({ext.prompt_token_count})" - BATCH_SIZE=1 + BATCH_SIZE=4 texts, graphs = [], [] cnt = 0 threads = [] - exe = ThreadPoolExecutor(max_workers=12) + exe = ThreadPoolExecutor(max_workers=50) for i in range(len(chunks)): tkn_cnt = num_tokens_from_string(chunks[i]) if cnt+tkn_cnt >= left_token_count and texts: