Fix the issue of repeated escaping of quotes in hit test (#13477)

This commit is contained in:
liuzhenghua 2025-02-11 09:58:31 +08:00 committed by GitHub
parent f0a845f0f9
commit 47a64610ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 3 deletions

View File

@ -1,3 +1,4 @@
import json
import threading import threading
from typing import Optional from typing import Optional
@ -171,7 +172,7 @@ class RetrievalService:
vector = Vector(dataset=dataset) vector = Vector(dataset=dataset)
documents = vector.search_by_vector( documents = vector.search_by_vector(
cls.escape_query_for_search(query), query,
search_type="similarity_score_threshold", search_type="similarity_score_threshold",
top_k=top_k, top_k=top_k,
score_threshold=score_threshold, score_threshold=score_threshold,
@ -250,7 +251,7 @@ class RetrievalService:
@staticmethod @staticmethod
def escape_query_for_search(query: str) -> str: def escape_query_for_search(query: str) -> str:
return query.replace('"', '\\"') return json.dumps(query).strip('"')
@staticmethod @staticmethod
def format_retrieval_documents(documents: list[Document]) -> list[RetrievalSegments]: def format_retrieval_documents(documents: list[Document]) -> list[RetrievalSegments]:

View File

@ -47,7 +47,7 @@ class HitTestingService:
all_documents = RetrievalService.retrieve( all_documents = RetrievalService.retrieve(
retrieval_method=retrieval_model.get("search_method", "semantic_search"), retrieval_method=retrieval_model.get("search_method", "semantic_search"),
dataset_id=dataset.id, dataset_id=dataset.id,
query=cls.escape_query_for_search(query), query=query,
top_k=retrieval_model.get("top_k", 2), top_k=retrieval_model.get("top_k", 2),
score_threshold=retrieval_model.get("score_threshold", 0.0) score_threshold=retrieval_model.get("score_threshold", 0.0)
if retrieval_model["score_threshold_enabled"] if retrieval_model["score_threshold_enabled"]