fix: Resolve KnowledgeGraph entity resolution errors (#6653) (#6691)

### Related Issue: #6653 ### Environment: Using nightly version Elasticsearch database ### Bug Description: When clicking the "Entity Resolution" button in KnowledgeGraph, encountered the following errors: graphrag/entity_resolution.py ``` list(sub_connect_graph.nodes) AttributeError ``` graphrag/general/extractor.py ``` node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr]))) TypeError: 'NoneType' object is not iterable ``` ``` for attr in ["keywords", "source_id"]: KeyError I think attribute "keywords" is in edges not nodes ``` graphrag/utils.py ``` settings.docStoreConn.delete() # Sync function called as async ``` ### Changes Made: Fixed AttributeError in entity_resolution.py by properly handling graph nodes Fixed TypeError and KeyError in extractor.py by separate operations Corrected async/sync mismatch in document deletion call
2025-04-19 12:39:59 +08:00 · 2025-03-31 22:31:35 +08:00 · 2025-03-31 22:31:35 +08:00 · ece59034f7
commit ece59034f7
parent 0a42e5777e
3 changed files with 5 additions and 6 deletions
--- a/graphrag/entity_resolution.py
+++ b/graphrag/entity_resolution.py
@ -111,7 +111,7 @@ class EntityResolution(Extractor):
        connect_graph.add_edges_from(resolution_result)
        async with trio.open_nursery() as nursery:
            for sub_connect_graph in nx.connected_components(connect_graph):
-                merging_nodes = list(sub_connect_graph.nodes)
+                merging_nodes = list(sub_connect_graph.nodes())
                nursery.start_soon(lambda: self._merge_graph_nodes(graph, merging_nodes, change))

        # Update pagerank
--- a/graphrag/general/extractor.py
+++ b/graphrag/general/extractor.py
@ -201,8 +201,7 @@ class Extractor:
            # Merge two nodes, keep "entity_name", "entity_type", "page_rank" unchanged.
            node1_attrs = graph.nodes[node1]
            node0_attrs["description"] += f"{GRAPH_FIELD_SEP}{node1_attrs['description']}"
-            for attr in ["keywords", "source_id"]:
-                node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr])))
+            node0_attrs["source_id"] = sorted(set(node0_attrs["source_id"].extend(node1_attrs.get("source_id", []))))
            for neighbor in graph.neighbors(node1):
                change.removed_edges.add(get_from_to(node1, neighbor))
                if neighbor not in nodes_set:
@ -213,8 +212,8 @@ class Extractor:
                        edge0_attrs = graph.get_edge_data(nodes[0], neighbor)
                        edge0_attrs["weight"] += edge1_attrs["weight"]
                        edge0_attrs["description"] += f"{GRAPH_FIELD_SEP}{edge1_attrs['description']}"
-                        edge0_attrs["keywords"] = list(set(edge0_attrs["keywords"].extend(edge1_attrs["keywords"])))
-                        edge0_attrs["source_id"] = list(set(edge0_attrs["source_id"].extend(edge1_attrs["source_id"])))
+                        for attr in ["keywords", "source_id"]:
+                            edge0_attrs[attr] = sorted(set(edge0_attrs[attr].extend(edge1_attrs.get(attr, []))))
                        edge0_attrs["description"] = await self._handle_entity_relation_summary(f"({nodes[0]}, {neighbor})", edge0_attrs["description"])
                        graph.add_edge(nodes[0], neighbor, **edge0_attrs)
                    else:
--- a/graphrag/utils.py
+++ b/graphrag/utils.py
@ -439,7 +439,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
    if change.removed_edges:
        async with trio.open_nursery() as nursery:
            for from_node, to_node in change.removed_edges:
-                nursery.start_soon(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id))
+                 nursery.start_soon(lambda: trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id)))
    now = trio.current_time()
    if callback:
        callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.")