From d2213141e056c624a96b06df18230344c3175103 Mon Sep 17 00:00:00 2001
From: H <43509927+guoyuhao2330@users.noreply.github.com>
Date: Mon, 5 Aug 2024 14:44:54 +0800
Subject: [PATCH] Fix graphrag callback (#1806)

### What problem does this PR solve?

#1800

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
---
 graphrag/index.py          | 4 ++--
 rag/app/knowledge_graph.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/graphrag/index.py b/graphrag/index.py
index a60834719..e0b5a1993 100644
--- a/graphrag/index.py
+++ b/graphrag/index.py
@@ -45,7 +45,7 @@ def graph_merge(g1, g2):
     g = g2.copy()
     for n, attr in g1.nodes(data=True):
         if n not in g2.nodes():
-            g2.add_node(n, **attr)
+            g.add_node(n, **attr)
             continue
 
         g.nodes[n]["weight"] += 1
@@ -75,7 +75,7 @@ def build_knowlege_graph_chunks(tenant_id: str, chunks: List[str], callback, ent
     cnt = 0
     threads = []
     exe = ThreadPoolExecutor(max_workers=12)
-    for i in range(len(chunks[:512])):
+    for i in range(len(chunks)):
         tkn_cnt = num_tokens_from_string(chunks[i])
         if cnt+tkn_cnt >= left_token_count and texts:
             threads.append(exe.submit(ext, texts, {"entity_types": entity_types}))
diff --git a/rag/app/knowledge_graph.py b/rag/app/knowledge_graph.py
index a8775f9cc..9f47769b0 100644
--- a/rag/app/knowledge_graph.py
+++ b/rag/app/knowledge_graph.py
@@ -13,7 +13,7 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
     eng = lang.lower() == "english"
 
     parser_config["layout_recognize"] = False
-    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, parser_config=parser_config)
+    sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True, callback=callback ,parser_config=parser_config)
     chunks = build_knowlege_graph_chunks(tenant_id, sections, callback,
                                          parser_config.get("entity_types", ["organization", "person", "location", "event", "time"])
                                          )
@@ -27,4 +27,4 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
     doc["title_sm_tks"] = rag_tokenizer.fine_grained_tokenize(doc["title_tks"])
     chunks.extend(tokenize_chunks(sections, doc, eng))
 
-    return chunks
\ No newline at end of file
+    return chunks