From 9e7d052c8d90efb88db4d8acaca1281b3a90a1fb Mon Sep 17 00:00:00 2001 From: alkscr <72345450+alkscr@users.noreply.github.com> Date: Thu, 17 Apr 2025 16:15:21 +0800 Subject: [PATCH] Fix: knowledge graph resolution with infinity raise error tokenizing in specific situations (#7048) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? When running graph resolution with infinity, if single quotation marks appeared in the entities name that to be delete, an error tokenizing of sqlglot might occur after calling infinity. For example: ``` INFINITY delete table ragflow_xxx, filter knowledge_graph_kwd IN ('entity') AND entity_kwd IN ('86 IMAGES FROM PREVIOUS CONTESTS', 'ADAM OPTIMIZATION', 'BACKGROUND'ESTIMATION') ``` may raise error ``` Error tokenizing 'TS', 'ADAM OPTIMIZATION', 'BACKGROUND'ESTIMATION'' ``` and make the document parsing failed。 Replace one single quotation mark with double single quotation marks can let sqlglot tokenize the entity name correctly. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/utils/infinity_conn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index c97621773..0abc1903d 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -68,6 +68,7 @@ def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | N inCond = list() for item in v: if isinstance(item, str): + item = item.replace("'","''") inCond.append(f"'{item}'") else: inCond.append(str(item))