feat: add pgvector full_text_search (#7396)

This commit is contained in:
Byeongjin Kang 2024-08-20 12:01:13 +09:00 committed by GitHub
parent 218380ba43
commit 0223fc6fd5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 6 deletions

View File

@ -152,8 +152,27 @@ class PGVector(BaseVector):
return docs
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
# do not support bm25 search
return []
top_k = kwargs.get("top_k", 5)
with self._get_cursor() as cur:
cur.execute(
f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score
FROM {self.table_name}
WHERE to_tsvector(text) @@ plainto_tsquery(%s)
ORDER BY score DESC
LIMIT {top_k}""",
# f"'{query}'" is required in order to account for whitespace in query
(f"'{query}'", f"'{query}'"),
)
docs = []
for record in cur:
metadata, text, score = record
metadata["score"] = score
docs.append(Document(page_content=text, metadata=metadata))
return docs
def delete(self) -> None:
with self._get_cursor() as cur:

View File

@ -21,10 +21,6 @@ class PGVectorTest(AbstractVectorTest):
),
)
def search_by_full_text(self):
hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
assert len(hits_by_full_text) == 0
def test_pgvector(setup_mock_redis):
PGVectorTest().run_all_tests()