diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 33ca5bc028..c9f2f35af0 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -152,8 +152,27 @@ class PGVector(BaseVector): return docs def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: - # do not support bm25 search - return [] + top_k = kwargs.get("top_k", 5) + + with self._get_cursor() as cur: + cur.execute( + f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score + FROM {self.table_name} + WHERE to_tsvector(text) @@ plainto_tsquery(%s) + ORDER BY score DESC + LIMIT {top_k}""", + # f"'{query}'" is required in order to account for whitespace in query + (f"'{query}'", f"'{query}'"), + ) + + docs = [] + + for record in cur: + metadata, text, score = record + metadata["score"] = score + docs.append(Document(page_content=text, metadata=metadata)) + + return docs def delete(self) -> None: with self._get_cursor() as cur: diff --git a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py index 851599c7ce..c5a986b747 100644 --- a/api/tests/integration_tests/vdb/pgvector/test_pgvector.py +++ b/api/tests/integration_tests/vdb/pgvector/test_pgvector.py @@ -21,10 +21,6 @@ class PGVectorTest(AbstractVectorTest): ), ) - def search_by_full_text(self): - hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text()) - assert len(hits_by_full_text) == 0 - def test_pgvector(setup_mock_redis): PGVectorTest().run_all_tests()