From 920b2c2b40a017cb5c25fae9def16b101896d5c3 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 27 Feb 2024 17:30:52 +0800 Subject: [PATCH] Fix/hit test tsne issue (#2581) Co-authored-by: jyong --- api/celerybeat-schedule.db | Bin 16384 -> 0 bytes api/core/features/annotation_reply.py | 2 +- api/core/rag/datasource/retrieval_service.py | 4 ++-- api/services/hit_testing_service.py | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) delete mode 100644 api/celerybeat-schedule.db diff --git a/api/celerybeat-schedule.db b/api/celerybeat-schedule.db deleted file mode 100644 index b8c01de27bfe7ea04f1dd868cec4935ef336f2b5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI%J#W)M7zglk>f!`QTojc7@c|m78dNo6>rydA=}>~icDmRP*lOxV@fkIhEM?+F zx~ENo1C0EEPX#K4Zk%h}inp%k`MMgNm7_ax8R_xN|`DeS_kV2srmv)?kd zVnTMAG0O~jXZ12L`QnGAa$GZ`oyR9}_Q8yK%je{M;jLbjy6|POAOs))0SG_<0uX=z z1Rwwb2teT62-Mj(_lx`4{p7xP-?*>cL367)Xr7z$Q78l;009U<00Izz00bZafio4D z*(VRmKSDFTrmp!T5;3R!Aq7DcKjgmfL*h~-ds|{FA(gYPz8m9>b+|(oz zlF3h^(Edd@H?RIgm^Z6Ln3vLFqkP=;mUX?Y!&dQh8(}+K?Xmv-+WeBQRvRb$J&FTf zY(P5JdAXSdB=Qjg4Oi6}AWu2CL*wcPbKyxZF2{1Hu(=pg2NW2r$3a74k(-tpwo znZ77k90Cx400bZa0SG_<0uX=z1Rwx`g$R88js^h;KmY;|fB*y_009U<00Izz!2d4r E0}denXaE2J diff --git a/api/core/features/annotation_reply.py b/api/core/features/annotation_reply.py index e1b64cf73f..fd516e465f 100644 --- a/api/core/features/annotation_reply.py +++ b/api/core/features/annotation_reply.py @@ -59,7 +59,7 @@ class AnnotationReplyFeature: documents = vector.search_by_vector( query=query, - k=1, + top_k=1, score_threshold=score_threshold, filter={ 'group_id': [dataset.id] diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index e295e58950..0f9c753056 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -101,7 +101,7 @@ class RetrievalService: documents = keyword.search( query, - k=top_k + top_k=top_k ) all_documents.extend(documents) @@ -121,7 +121,7 @@ class RetrievalService: documents = vector.search_by_vector( query, search_type='similarity_score_threshold', - k=top_k, + top_k=top_k, score_threshold=score_threshold, filter={ 'group_id': [dataset.id] diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 568974b74f..6d5a0537d3 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -133,8 +133,9 @@ class HitTestingService: if embedding_length <= 1: return [{'x': 0, 'y': 0}] - concatenate_data = np.array(embeddings).reshape(embedding_length, -1) - # concatenate_data = np.concatenate(embeddings) + noise = np.random.normal(0, 1e-4, np.array(embeddings).shape) + concatenate_data = np.array(embeddings) + noise + concatenate_data = concatenate_data.reshape(embedding_length, -1) perplexity = embedding_length / 2 + 1 if perplexity >= embedding_length: