From 47a64610ca14ddd4de971dafdb3b64759294f34c Mon Sep 17 00:00:00 2001 From: liuzhenghua <1090179900@qq.com> Date: Tue, 11 Feb 2025 09:58:31 +0800 Subject: [PATCH] Fix the issue of repeated escaping of quotes in hit test (#13477) --- api/core/rag/datasource/retrieval_service.py | 5 +++-- api/services/hit_testing_service.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index 3a8200bc7b..927df0efc4 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -1,3 +1,4 @@ +import json import threading from typing import Optional @@ -171,7 +172,7 @@ class RetrievalService: vector = Vector(dataset=dataset) documents = vector.search_by_vector( - cls.escape_query_for_search(query), + query, search_type="similarity_score_threshold", top_k=top_k, score_threshold=score_threshold, @@ -250,7 +251,7 @@ class RetrievalService: @staticmethod def escape_query_for_search(query: str) -> str: - return query.replace('"', '\\"') + return json.dumps(query).strip('"') @staticmethod def format_retrieval_documents(documents: list[Document]) -> list[RetrievalSegments]: diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index e9176fc1c6..f8c1c1d297 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -47,7 +47,7 @@ class HitTestingService: all_documents = RetrievalService.retrieve( retrieval_method=retrieval_model.get("search_method", "semantic_search"), dataset_id=dataset.id, - query=cls.escape_query_for_search(query), + query=query, top_k=retrieval_model.get("top_k", 2), score_threshold=retrieval_model.get("score_threshold", 0.0) if retrieval_model["score_threshold_enabled"]