fix: Resolve KnowledgeGraph entity resolution errors (#6653) (#6691)

### Related Issue: #6653
### Environment:

Using nightly version

Elasticsearch database

### Bug Description:
When clicking the "Entity Resolution" button in KnowledgeGraph,
encountered the following errors:

graphrag/entity_resolution.py

```
list(sub_connect_graph.nodes) AttributeError
```

graphrag/general/extractor.py
```
node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr])))
TypeError: 'NoneType' object is not iterable
```
```
for attr in ["keywords", "source_id"]:  
 KeyError I think attribute "keywords" is in edges not nodes
```
graphrag/utils.py
```
settings.docStoreConn.delete()  # Sync function called as async
```
### Changes Made:

Fixed AttributeError in entity_resolution.py by properly handling graph
nodes

Fixed TypeError and KeyError in extractor.py by separate operations

Corrected async/sync mismatch in document deletion call
This commit is contained in:
Yue-Lyu123 2025-03-31 22:31:35 +08:00 committed by GitHub
parent 0a42e5777e
commit ece59034f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 6 deletions

View File

@ -111,7 +111,7 @@ class EntityResolution(Extractor):
connect_graph.add_edges_from(resolution_result)
async with trio.open_nursery() as nursery:
for sub_connect_graph in nx.connected_components(connect_graph):
merging_nodes = list(sub_connect_graph.nodes)
merging_nodes = list(sub_connect_graph.nodes())
nursery.start_soon(lambda: self._merge_graph_nodes(graph, merging_nodes, change))
# Update pagerank

View File

@ -201,8 +201,7 @@ class Extractor:
# Merge two nodes, keep "entity_name", "entity_type", "page_rank" unchanged.
node1_attrs = graph.nodes[node1]
node0_attrs["description"] += f"{GRAPH_FIELD_SEP}{node1_attrs['description']}"
for attr in ["keywords", "source_id"]:
node0_attrs[attr] = sorted(set(node0_attrs[attr].extend(node1_attrs[attr])))
node0_attrs["source_id"] = sorted(set(node0_attrs["source_id"].extend(node1_attrs.get("source_id", []))))
for neighbor in graph.neighbors(node1):
change.removed_edges.add(get_from_to(node1, neighbor))
if neighbor not in nodes_set:
@ -213,8 +212,8 @@ class Extractor:
edge0_attrs = graph.get_edge_data(nodes[0], neighbor)
edge0_attrs["weight"] += edge1_attrs["weight"]
edge0_attrs["description"] += f"{GRAPH_FIELD_SEP}{edge1_attrs['description']}"
edge0_attrs["keywords"] = list(set(edge0_attrs["keywords"].extend(edge1_attrs["keywords"])))
edge0_attrs["source_id"] = list(set(edge0_attrs["source_id"].extend(edge1_attrs["source_id"])))
for attr in ["keywords", "source_id"]:
edge0_attrs[attr] = sorted(set(edge0_attrs[attr].extend(edge1_attrs.get(attr, []))))
edge0_attrs["description"] = await self._handle_entity_relation_summary(f"({nodes[0]}, {neighbor})", edge0_attrs["description"])
graph.add_edge(nodes[0], neighbor, **edge0_attrs)
else:

View File

@ -439,7 +439,7 @@ async def set_graph(tenant_id: str, kb_id: str, embd_mdl, graph: nx.Graph, chang
if change.removed_edges:
async with trio.open_nursery() as nursery:
for from_node, to_node in change.removed_edges:
nursery.start_soon(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id))
nursery.start_soon(lambda: trio.to_thread.run_sync(lambda: settings.docStoreConn.delete({"knowledge_graph_kwd": ["relation"], "from_entity_kwd": from_node, "to_entity_kwd": to_node}, search.index_name(tenant_id), kb_id)))
now = trio.current_time()
if callback:
callback(msg=f"set_graph removed {len(change.removed_nodes)} nodes and {len(change.removed_edges)} edges from index in {now - start:.2f}s.")