From ab9fcbdfb9617aa39a53d9815bcbbd8e9670417e Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Sun, 7 Apr 2024 15:20:58 +0800 Subject: [PATCH] Duplicate embedding cache check (#3134) Co-authored-by: jyong --- api/core/embedding/cached_embedding.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py index 11dfe8dc15..4156368e56 100644 --- a/api/core/embedding/cached_embedding.py +++ b/api/core/embedding/cached_embedding.py @@ -60,14 +60,17 @@ class CacheEmbedding(Embeddings): db.session.rollback() except Exception as e: logging.exception('Failed transform embedding: ', e) + cache_embeddings = [] for i, embedding in zip(embedding_queue_indices, embedding_queue_embeddings): text_embeddings[i] = embedding hash = helper.generate_text_hash(texts[i]) - embedding_cache = Embedding(model_name=self._model_instance.model, - hash=hash, - provider_name=self._model_instance.provider) - embedding_cache.set_embedding(embedding) - db.session.add(embedding_cache) + if hash not in cache_embeddings: + embedding_cache = Embedding(model_name=self._model_instance.model, + hash=hash, + provider_name=self._model_instance.provider) + embedding_cache.set_embedding(embedding) + db.session.add(embedding_cache) + cache_embeddings.append(hash) db.session.commit() except Exception as ex: db.session.rollback()