diff --git a/api/celerybeat-schedule.db b/api/celerybeat-schedule.db deleted file mode 100644 index b8c01de27b..0000000000 Binary files a/api/celerybeat-schedule.db and /dev/null differ diff --git a/api/core/features/annotation_reply.py b/api/core/features/annotation_reply.py index e1b64cf73f..fd516e465f 100644 --- a/api/core/features/annotation_reply.py +++ b/api/core/features/annotation_reply.py @@ -59,7 +59,7 @@ class AnnotationReplyFeature: documents = vector.search_by_vector( query=query, - k=1, + top_k=1, score_threshold=score_threshold, filter={ 'group_id': [dataset.id] diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index e295e58950..0f9c753056 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -101,7 +101,7 @@ class RetrievalService: documents = keyword.search( query, - k=top_k + top_k=top_k ) all_documents.extend(documents) @@ -121,7 +121,7 @@ class RetrievalService: documents = vector.search_by_vector( query, search_type='similarity_score_threshold', - k=top_k, + top_k=top_k, score_threshold=score_threshold, filter={ 'group_id': [dataset.id] diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 568974b74f..6d5a0537d3 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -133,8 +133,9 @@ class HitTestingService: if embedding_length <= 1: return [{'x': 0, 'y': 0}] - concatenate_data = np.array(embeddings).reshape(embedding_length, -1) - # concatenate_data = np.concatenate(embeddings) + noise = np.random.normal(0, 1e-4, np.array(embeddings).shape) + concatenate_data = np.array(embeddings) + noise + concatenate_data = concatenate_data.reshape(embedding_length, -1) perplexity = embedding_length / 2 + 1 if perplexity >= embedding_length: