diff --git a/api/apps/sdk/chat.py b/api/apps/sdk/chat.py index 99def010f..334cb9f40 100644 --- a/api/apps/sdk/chat.py +++ b/api/apps/sdk/chat.py @@ -30,9 +30,9 @@ from api.utils.api_utils import get_result @token_required def create(tenant_id): req=request.json - ids= req.get("datasets") + ids= req.get("dataset_ids") if not ids: - return get_error_data_result(retmsg="`datasets` is required") + return get_error_data_result(retmsg="`dataset_ids` is required") for kb_id in ids: kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id) if not kbs: @@ -138,7 +138,7 @@ def create(tenant_id): res["llm"] = res.pop("llm_setting") res["llm"]["model_name"] = res.pop("llm_id") del res["kb_ids"] - res["datasets"] = req["datasets"] + res["dataset_ids"] = req["dataset_ids"] res["avatar"] = res.pop("icon") return get_result(data=res) @@ -148,8 +148,8 @@ def update(tenant_id,chat_id): if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value): return get_error_data_result(retmsg='You do not own the chat') req =request.json - ids = req.get("datasets") - if "datasets" in req: + ids = req.get("dataset_ids") + if "dataset_ids" in req: if not ids: return get_error_data_result("`datasets` can't be empty") if ids: @@ -214,8 +214,8 @@ def update(tenant_id,chat_id): # avatar if "avatar" in req: req["icon"] = req.pop("avatar") - if "datasets" in req: - req.pop("datasets") + if "dataset_ids" in req: + req.pop("dataset_ids") if not DialogService.update_by_id(chat_id, req): return get_error_data_result(retmsg="Chat not found!") return get_result() diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index ab2d87b60..228ae95dd 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -550,33 +550,32 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id): @token_required def retrieval_test(tenant_id): req = request.json - if not req.get("datasets"): + if not req.get("dataset_ids"): return get_error_data_result("`datasets` is required.") - kb_ids = req["datasets"] + kb_ids = req["dataset_ids"] if not isinstance(kb_ids,list): return get_error_data_result("`datasets` should be a list") kbs = KnowledgebaseService.get_by_ids(kb_ids) - embd_nms = list(set([kb.embd_id for kb in kbs])) - if len(embd_nms) != 1: - return get_result( - retmsg='Knowledge bases use different embedding models or does not exist."', - retcode=RetCode.AUTHENTICATION_ERROR) - if isinstance(kb_ids, str): kb_ids = [kb_ids] for id in kb_ids: if not KnowledgebaseService.query(id=id,tenant_id=tenant_id): return get_error_data_result(f"You don't own the dataset {id}.") + embd_nms = list(set([kb.embd_id for kb in kbs])) + if len(embd_nms) != 1: + return get_result( + retmsg='Datasets use different embedding models."', + retcode=RetCode.AUTHENTICATION_ERROR) if "question" not in req: return get_error_data_result("`question` is required.") page = int(req.get("offset", 1)) size = int(req.get("limit", 1024)) question = req["question"] - doc_ids = req.get("documents", []) - if not isinstance(req.get("documents"),list): + doc_ids = req.get("document_ids", []) + if not isinstance(doc_ids,list): return get_error_data_result("`documents` should be a list") doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids) for doc_id in doc_ids: if doc_id not in doc_ids_list: - return get_error_data_result(f"You don't own the document {doc_id}") + return get_error_data_result(f"The datasets don't own the document {doc_id}") similarity_threshold = float(req.get("similarity_threshold", 0.2)) vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3)) top = int(req.get("top_k", 1024)) diff --git a/sdk/python/ragflow/modules/chat.py b/sdk/python/ragflow/modules/chat.py index 596ba7b5c..ecfc247c0 100644 --- a/sdk/python/ragflow/modules/chat.py +++ b/sdk/python/ragflow/modules/chat.py @@ -9,7 +9,7 @@ class Chat(Base): self.id = "" self.name = "assistant" self.avatar = "path/to/avatar" - self.datasets = ["kb1"] + self.dataset_ids = ["kb1"] self.llm = Chat.LLM(rag, {}) self.prompt = Chat.Prompt(rag, {}) super().__init__(rag, res_dict) diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index 4c9e5a177..df37df68e 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -64,8 +64,8 @@ class RAGFlow: return DataSet(self, res["data"]) raise Exception(res["message"]) - def delete_datasets(self, ids: List[str] = None, names: List[str] = None): - res = self.delete("/dataset",{"ids": ids, "names": names}) + def delete_datasets(self, ids: List[str]): + res = self.delete("/dataset",{"ids": ids}) res=res.json() if res.get("code") != 0: raise Exception(res["message"]) @@ -89,11 +89,11 @@ class RAGFlow: return result_list raise Exception(res["message"]) - def create_chat(self, name: str, avatar: str = "", datasets: List[DataSet] = [], + def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [], llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat: dataset_list = [] - for dataset in datasets: - dataset_list.append(dataset.id) + for id in dataset_ids: + dataset_list.append(id) if llm is None: llm = Chat.LLM(self, {"model_name": None, @@ -126,7 +126,7 @@ class RAGFlow: temp_dict = {"name": name, "avatar": avatar, - "datasets": dataset_list, + "dataset_ids": dataset_list, "llm": llm.to_json(), "prompt": prompt.to_json()} res = self.post("/chat", temp_dict) @@ -154,7 +154,9 @@ class RAGFlow: raise Exception(res["message"]) - def retrieve(self, datasets,documents,question="", offset=1, limit=1024, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,): + def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ): + if document_ids is None: + document_ids = [] data_json ={ "offset": offset, "limit": limit, @@ -164,10 +166,9 @@ class RAGFlow: "rerank_id": rerank_id, "keyword": keyword, "question": question, - "datasets": datasets, - "documents": documents + "datasets": dataset_ids, + "documents": document_ids } - # Send a POST request to the backend service (using requests library as an example, actual implementation may vary) res = self.post(f'/retrieval',json=data_json) res = res.json() diff --git a/sdk/python/test/t_chat.py b/sdk/python/test/t_chat.py index 0e34b818e..6ee321ad6 100644 --- a/sdk/python/test/t_chat.py +++ b/sdk/python/test/t_chat.py @@ -1,5 +1,4 @@ from ragflow import RAGFlow, Chat -import time HOST_ADDRESS = 'http://127.0.0.1:9380' def test_create_chat_with_name(get_api_key_fixture): @@ -12,13 +11,10 @@ def test_create_chat_with_name(get_api_key_fixture): document = {"displayed_name":displayed_name,"blob":blob} documents = [] documents.append(document) - doc_ids = [] docs= kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - rag.create_chat("test_create", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + rag.create_chat("test_create", dataset_ids=[kb.id]) def test_update_chat_with_name(get_api_key_fixture): @@ -31,13 +27,10 @@ def test_update_chat_with_name(get_api_key_fixture): document = {"displayed_name": displayed_name, "blob": blob} documents = [] documents.append(document) - doc_ids = [] docs = kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - chat = rag.create_chat("test_update", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + chat = rag.create_chat("test_update", dataset_ids=[kb.id]) chat.update({"name": "new_chat"}) @@ -51,17 +44,27 @@ def test_delete_chats_with_success(get_api_key_fixture): document = {"displayed_name": displayed_name, "blob": blob} documents = [] documents.append(document) - doc_ids = [] docs = kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - chat = rag.create_chat("test_delete", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + chat = rag.create_chat("test_delete", dataset_ids=[kb.id]) rag.delete_chats(ids=[chat.id]) +def test_list_chats_with_success(get_api_key_fixture): API_KEY = get_api_key_fixture rag = RAGFlow(API_KEY, HOST_ADDRESS) + kb = rag.create_dataset(name="test_delete_chat") + displayed_name = "ragflow.txt" + with open("./ragflow.txt", "rb") as file: + blob = file.read() + document = {"displayed_name": displayed_name, "blob": blob} + documents = [] + documents.append(document) + docs = kb.upload_documents(documents) + for doc in docs: + doc.add_chunk("This is a test to add chunk") + rag.create_chat("test_list_1", dataset_ids=[kb.id]) + rag.create_chat("test_list_2", dataset_ids=[kb.id]) rag.list_chats() diff --git a/sdk/python/test/t_session.py b/sdk/python/test/t_session.py index 58370344f..9dbc3922c 100644 --- a/sdk/python/test/t_session.py +++ b/sdk/python/test/t_session.py @@ -10,16 +10,13 @@ def test_create_session_with_success(get_api_key_fixture): displayed_name = "ragflow.txt" with open("./ragflow.txt", "rb") as file: blob = file.read() - document = {"displayed_name": displayed_name, "blob": blob} + document = {"displayed_name":displayed_name,"blob":blob} documents = [] documents.append(document) - doc_ids = [] - docs = kb.upload_documents(documents) + docs= kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - assistant = rag.create_chat(name="test_create_session", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) assistant.create_session() @@ -30,16 +27,13 @@ def test_create_conversation_with_success(get_api_key_fixture): displayed_name = "ragflow.txt" with open("./ragflow.txt","rb") as file: blob = file.read() - document = {"displayed_name":displayed_name,"blob":blob} + document = {"displayed_name": displayed_name, "blob": blob} documents = [] documents.append(document) - doc_ids = [] - docs= kb.upload_documents(documents) + docs = kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - assistant = rag.create_chat(name="test_create_conversation", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + assistant = rag.create_chat("test_create", dataset_ids=[kb.id]) session = assistant.create_session() question = "What is AI" for ans in session.ask(question, stream=True): @@ -57,13 +51,10 @@ def test_delete_sessions_with_success(get_api_key_fixture): document = {"displayed_name":displayed_name,"blob":blob} documents = [] documents.append(document) - doc_ids = [] docs= kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - assistant = rag.create_chat(name="test_delete_session", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) session = assistant.create_session() assistant.delete_sessions(ids=[session.id]) @@ -74,16 +65,13 @@ def test_update_session_with_name(get_api_key_fixture): displayed_name = "ragflow.txt" with open("./ragflow.txt","rb") as file: blob = file.read() - document = {"displayed_name":displayed_name,"blob":blob} + document = {"displayed_name": displayed_name, "blob": blob} documents = [] documents.append(document) - doc_ids = [] - docs= kb.upload_documents(documents) + docs = kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - assistant = rag.create_chat(name="test_update_session", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + assistant = rag.create_chat("test_create", dataset_ids=[kb.id]) session = assistant.create_session(name="old session") session.update({"name": "new session"}) @@ -98,13 +86,10 @@ def test_list_sessions_with_success(get_api_key_fixture): document = {"displayed_name":displayed_name,"blob":blob} documents = [] documents.append(document) - doc_ids = [] docs= kb.upload_documents(documents) for doc in docs: - doc_ids.append(doc.id) - kb.async_parse_documents(doc_ids) - time.sleep(60) - assistant = rag.create_chat(name="test_list_session", datasets=[kb]) + doc.add_chunk("This is a test to add chunk") + assistant=rag.create_chat("test_create", dataset_ids=[kb.id]) assistant.create_session("test_1") assistant.create_session("test_2") assistant.list_sessions() \ No newline at end of file