diff --git a/rag/utils/opensearch_coon.py b/rag/utils/opensearch_coon.py index 8bbde7e07..4a8fd0889 100644 --- a/rag/utils/opensearch_coon.py +++ b/rag/utils/opensearch_coon.py @@ -217,7 +217,7 @@ class OSConnection(DocStoreConnection): if bqry: s = s.query(bqry) for field in highlightFields: - s = s.highlight(field) + s = s.highlight(field,force_source=True,no_match_size=30,require_field_match=False) if orderBy: orders = list() @@ -269,7 +269,7 @@ class OSConnection(DocStoreConnection): for i in range(ATTEMPT_TIME): try: res = self.os.get(index=(indexName), - id=chunkId, source=True, ) + id=chunkId, _source=True, ) if str(res.get("timed_out", "")).lower() == "true": raise Exception("Es Timeout.") chunk = res["_source"] @@ -329,7 +329,7 @@ class OSConnection(DocStoreConnection): chunkId = condition["id"] for i in range(ATTEMPT_TIME): try: - self.os.update(index=indexName, id=chunkId, doc=doc) + self.os.update(index=indexName, id=chunkId, body=doc) return True except Exception as e: logger.exception( @@ -411,7 +411,10 @@ class OSConnection(DocStoreConnection): chunk_ids = condition["id"] if not isinstance(chunk_ids, list): chunk_ids = [chunk_ids] - qry = Q("ids", values=chunk_ids) + if not chunk_ids: # when chunk_ids is empty, delete all + qry = Q("match_all") + else: + qry = Q("ids", values=chunk_ids) else: qry = Q("bool") for k, v in condition.items(): diff --git a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py index 35b6e416c..f866d3f09 100644 --- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py +++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_list_chunks.py @@ -69,7 +69,7 @@ class TestChunksList: [ ({"page_size": None}, 0, 5, ""), pytest.param({"page_size": 0}, 0, 5, "", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity does not support page_size=0")), - pytest.param({"page_size": 0}, 100, 0, "3013", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="Infinity does not support page_size=0")), + pytest.param({"page_size": 0}, 100, 0, "3013", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch", "elasticsearch"], reason="Infinity does not support page_size=0")), ({"page_size": 1}, 0, 1, ""), ({"page_size": 6}, 0, 5, ""), ({"page_size": "1"}, 0, 1, ""), diff --git a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py index df6731b16..c4fd4b626 100644 --- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py +++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_retrieval_chunks.py @@ -185,28 +185,28 @@ class TestChunksRetrieval: 0, 4, "", - marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity"), + marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in ["infinity", "opensearch"], reason="Infinity"), ), pytest.param( {"top_k": 1}, 0, 1, "", - marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="elasticsearch"), + marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch", "elasticsearch"], reason="elasticsearch"), ), pytest.param( {"top_k": -1}, 100, 4, "must be greater than 0", - marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="Infinity"), + marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in ["infinity", "opensearch"], reason="Infinity"), ), pytest.param( {"top_k": -1}, 100, 4, "3014", - marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="elasticsearch"), + marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch", "elasticsearch"], reason="elasticsearch"), ), pytest.param( {"top_k": "a"}, diff --git a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_update_chunk.py b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_update_chunk.py index 710829ac1..b364f81bd 100644 --- a/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_update_chunk.py +++ b/sdk/python/test/test_http_api/test_chunk_management_within_dataset/test_update_chunk.py @@ -146,7 +146,7 @@ class TestUpdatedChunk: [ ("", 100, ""), pytest.param("invalid_dataset_id", 102, "You don't own the dataset invalid_dataset_id.", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") == "infinity", reason="infinity")), - pytest.param("invalid_dataset_id", 102, "Can't find this chunk", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "elasticsearch"], reason="elasticsearch")), + pytest.param("invalid_dataset_id", 102, "Can't find this chunk", marks=pytest.mark.skipif(os.getenv("DOC_ENGINE") in [None, "opensearch","elasticsearch"], reason="elasticsearch")), ], ) def test_invalid_dataset_id(self, get_http_api_auth, add_chunks, dataset_id, expected_code, expected_message):