Refactor Chunk API (#2855)

### What problem does this PR solve? Refactor Chunk API #2846 ### Type of change - [x] Refactoring --------- Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn> Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2025-08-12 11:09:07 +08:00 · 2024-10-16 18:41:24 +08:00 · 2024-10-16 18:41:24 +08:00 · dab92ac1e8
commit dab92ac1e8
parent b9fa00f341
11 changed files with 760 additions and 791 deletions
--- a/api/apps/sdk/doc.py
+++ b/api/apps/sdk/doc.py
@ -119,13 +119,11 @@ def update_doc(tenant_id, dataset_id, document_id):
        if informs:
            e, file = FileService.get_by_id(informs[0].file_id)
            FileService.update_by_id(file.id, {"name": req["name"]})
+    if "parser_config" in req:
+        DocumentService.update_parser_config(doc.id, req["parser_config"])
    if "parser_method" in req:
        if doc.parser_id.lower() == req["parser_method"].lower():
-            if "parser_config" in req:
-                if req["parser_config"] == doc.parser_config:
-                    return get_result(retcode=RetCode.SUCCESS)
-            else:
-                return get_result(retcode=RetCode.SUCCESS)
+                return get_result()

        if doc.type == FileType.VISUAL or re.search(
                r"\.(ppt|pptx|pages)$", doc.name):
@ -146,8 +144,6 @@ def update_doc(tenant_id, dataset_id, document_id):
                return get_error_data_result(retmsg="Tenant not found!")
            ELASTICSEARCH.deleteByQuery(
                Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
-    if "parser_config" in req:
-        DocumentService.update_parser_config(doc.id, req["parser_config"])

    return get_result()

@ -258,6 +254,8 @@ def parse(tenant_id,dataset_id):
    if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
        return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
    req = request.json
+    if not req.get("document_ids"):
+        return get_error_data_result("`document_ids` is required")
    for id in req["document_ids"]:
        if not DocumentService.query(id=id,kb_id=dataset_id):
            return get_error_data_result(retmsg=f"You don't own the document {id}.")
@ -283,9 +281,14 @@ def stop_parsing(tenant_id,dataset_id):
    if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
        return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
    req = request.json
+    if not req.get("document_ids"):
+        return get_error_data_result("`document_ids` is required")
    for id in req["document_ids"]:
-        if not DocumentService.query(id=id,kb_id=dataset_id):
+        doc = DocumentService.query(id=id, kb_id=dataset_id)
+        if not doc:
            return get_error_data_result(retmsg=f"You don't own the document {id}.")
+        if doc[0].progress == 100.0 or doc[0].progress == 0.0:
+            return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
        info = {"run": "2", "progress": 0}
        DocumentService.update_by_id(id, info)
        # if str(req["run"]) == TaskStatus.CANCEL.value:
@ -297,7 +300,7 @@ def stop_parsing(tenant_id,dataset_id):

@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk', methods=['GET'])
@token_required
-def list_chunk(tenant_id,dataset_id,document_id):
+def list_chunks(tenant_id,dataset_id,document_id):
    if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
        return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
    doc=DocumentService.query(id=document_id, kb_id=dataset_id)
@ -309,57 +312,58 @@ def list_chunk(tenant_id,dataset_id,document_id):
    page = int(req.get("offset", 1))
    size = int(req.get("limit", 30))
    question = req.get("keywords", "")
-    try:
-        query = {
-            "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
+    query = {
+        "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
+    }
+    sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
+    res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
+    origin_chunks = []
+    sign = 0
+    for id in sres.ids:
+        d = {
+            "chunk_id": id,
+            "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
+                id].get(
+                "content_with_weight", ""),
+            "doc_id": sres.field[id]["doc_id"],
+            "docnm_kwd": sres.field[id]["docnm_kwd"],
+            "important_kwd": sres.field[id].get("important_kwd", []),
+            "img_id": sres.field[id].get("img_id", ""),
+            "available_int": sres.field[id].get("available_int", 1),
+            "positions": sres.field[id].get("position_int", "").split("\t")
        }
-        if "available_int" in req:
-            query["available_int"] = int(req["available_int"])
-        sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
-        res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
+        if len(d["positions"]) % 5 == 0:
+            poss = []
+            for i in range(0, len(d["positions"]), 5):
+                poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
+                             float(d["positions"][i + 3]), float(d["positions"][i + 4])])
+            d["positions"] = poss

-        origin_chunks = []
-        for id in sres.ids:
-            d = {
-                "chunk_id": id,
-                "content_with_weight": rmSpace(sres.highlight[id]) if question and id in sres.highlight else sres.field[
-                    id].get(
-                    "content_with_weight", ""),
-                "doc_id": sres.field[id]["doc_id"],
-                "docnm_kwd": sres.field[id]["docnm_kwd"],
-                "important_kwd": sres.field[id].get("important_kwd", []),
-                "img_id": sres.field[id].get("img_id", ""),
-                "available_int": sres.field[id].get("available_int", 1),
-                "positions": sres.field[id].get("position_int", "").split("\t")
-            }
-            if len(d["positions"]) % 5 == 0:
-                poss = []
-                for i in range(0, len(d["positions"]), 5):
-                    poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
-                                 float(d["positions"][i + 3]), float(d["positions"][i + 4])])
-                d["positions"] = poss
+        origin_chunks.append(d)
+        if req.get("id"):
+            if req.get("id") == id:
+                origin_chunks.clear()
+                origin_chunks.append(d)
+                sign = 1
+                break
+    if req.get("id"):
+        if sign == 0:
+            return get_error_data_result(f"Can't find this chunk {req.get('id')}")
+    for chunk in origin_chunks:
+        key_mapping = {
+            "chunk_id": "id",
+            "content_with_weight": "content",
+            "doc_id": "document_id",
+            "important_kwd": "important_keywords",
+            "img_id": "image_id",
+        }
+        renamed_chunk = {}
+        for key, value in chunk.items():
+            new_key = key_mapping.get(key, key)
+            renamed_chunk[new_key] = value
+        res["chunks"].append(renamed_chunk)
+    return get_result(data=res)

-            origin_chunks.append(d)
-            ##rename keys
-            for chunk in origin_chunks:
-                key_mapping = {
-                    "chunk_id": "id",
-                    "content_with_weight": "content",
-                    "doc_id": "document_id",
-                    "important_kwd": "important_keywords",
-                    "img_id": "image_id",
-                }
-                renamed_chunk = {}
-                for key, value in chunk.items():
-                    new_key = key_mapping.get(key, key)
-                    renamed_chunk[new_key] = value
-                res["chunks"].append(renamed_chunk)
-        return get_result(data=res)
-    except Exception as e:
-        if str(e).find("not_found") > 0:
-            return get_result(retmsg=f'No chunk found!',
-                                   retcode=RetCode.DATA_ERROR)
-        return server_error_response(e)


@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk', methods=['POST'])
@ -374,6 +378,9 @@ def create(tenant_id,dataset_id,document_id):
    req = request.json
    if not req.get("content"):
        return get_error_data_result(retmsg="`content` is required")
+    if "important_keywords" in req:
+        if type(req["important_keywords"]) != list:
+            return get_error_data_result("`important_keywords` is required to be a list")
    md5 = hashlib.md5()
    md5.update((req["content"] + document_id).encode("utf-8"))

@ -381,8 +388,8 @@ def create(tenant_id,dataset_id,document_id):
    d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
         "content_with_weight": req["content"]}
    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
-    d["important_kwd"] = req.get("important_kwd", [])
-    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
+    d["important_kwd"] = req.get("important_keywords", [])
+    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_keywords", [])))
    d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
    d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
    d["kb_id"] = [doc.kb_id]
@ -432,12 +439,12 @@ def rm_chunk(tenant_id,dataset_id,document_id):
    req = request.json
    if not req.get("chunk_ids"):
        return get_error_data_result("`chunk_ids` is required")
+    query = {
+        "doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
+    sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
    for chunk_id in req.get("chunk_ids"):
-        res = ELASTICSEARCH.get(
-            chunk_id, search.index_name(
-                tenant_id))
-        if not res.get("found"):
-            return server_error_response(f"Chunk {chunk_id} not found")
+        if chunk_id not in sres.ids:
+            return get_error_data_result(f"Chunk {chunk_id} not found")
    if not ELASTICSEARCH.deleteByQuery(
            Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
        return get_error_data_result(retmsg="Index updating failure")
@ -451,24 +458,36 @@ def rm_chunk(tenant_id,dataset_id,document_id):
@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
@token_required
 def set(tenant_id,dataset_id,document_id,chunk_id):
-    res = ELASTICSEARCH.get(
+    try:
+        res = ELASTICSEARCH.get(
        chunk_id, search.index_name(
            tenant_id))
-    if not res.get("found"):
-        return get_error_data_result(f"Chunk {chunk_id} not found")
+    except Exception as e:
+        return get_error_data_result(f"Can't find this chunk {chunk_id}")
    if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
        return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
    doc = DocumentService.query(id=document_id, kb_id=dataset_id)
    if not doc:
        return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
+    doc = doc[0]
+    query = {
+        "doc_ids": [document_id], "page": 1, "size": 1024, "question": "", "sort": True
+    }
+    sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
+    if chunk_id not in sres.ids:
+        return get_error_data_result(f"You don't own the chunk {chunk_id}")
    req = request.json
+    content=res["_source"].get("content_with_weight")
    d = {
        "id": chunk_id,
-        "content_with_weight": req.get("content",res.get["content_with_weight"])}
-    d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
+        "content_with_weight": req.get("content",content)}
+    d["content_ltks"] = rag_tokenizer.tokenize(d["content_with_weight"])
    d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
-    d["important_kwd"] = req.get("important_keywords",[])
-    d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
+    if "important_keywords" in req:
+        if type(req["important_keywords"]) != list:
+            return get_error_data_result("`important_keywords` is required to be a list")
+        d["important_kwd"] = req.get("important_keywords")
+        d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
    if "available" in req:
        d["available_int"] = req["available"]
    embd_id = DocumentService.get_embd_id(document_id)
@ -478,7 +497,7 @@ def set(tenant_id,dataset_id,document_id,chunk_id):
        arr = [
            t for t in re.split(
                r"[\n\t]",
-                req["content"]) if len(t) > 1]
+                d["content_with_weight"]) if len(t) > 1]
        if len(arr) != 2:
            return get_error_data_result(
                retmsg="Q&A must be separated by TAB/ENTER key.")
@ -486,7 +505,7 @@ def set(tenant_id,dataset_id,document_id,chunk_id):
        d = beAdoc(d, arr[0], arr[1], not any(
            [rag_tokenizer.is_chinese(t) for t in q + a]))

-    v, c = embd_mdl.encode([doc.name, req["content"]])
+    v, c = embd_mdl.encode([doc.name, d["content_with_weight"]])
    v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
    d["q_%d_vec" % len(v)] = v.tolist()
    ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
@ -505,7 +524,7 @@ def retrieval_test(tenant_id):
    for id in kb_id:
        if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
            return get_error_data_result(f"You don't own the dataset {id}.")
-    if "question" not in req_json:
+    if "question" not in req:
        return get_error_data_result("`question` is required.")
    page = int(req.get("offset", 1))
    size = int(req.get("limit", 30))
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -24,10 +24,9 @@ from api.utils import get_uuid
 from api.utils.api_utils import get_error_data_result
 from api.utils.api_utils import get_result, token_required

-
@manager.route('/chat/<chat_id>/session', methods=['POST'])
@token_required
-def create(tenant_id, chat_id):
+def create(tenant_id,chat_id):
    req = request.json
    req["dialog_id"] = chat_id
    dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
@ -51,14 +50,13 @@ def create(tenant_id, chat_id):
    del conv["reference"]
    return get_result(data=conv)

-
@manager.route('/chat/<chat_id>/session/<session_id>', methods=['PUT'])
@token_required
-def update(tenant_id, chat_id, session_id):
+def update(tenant_id,chat_id,session_id):
    req = request.json
    req["dialog_id"] = chat_id
    conv_id = session_id
-    conv = ConversationService.query(id=conv_id, dialog_id=chat_id)
+    conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
    if not conv:
        return get_error_data_result(retmsg="Session does not exist")
    if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
@ -74,16 +72,30 @@ def update(tenant_id, chat_id, session_id):
    return get_result()


-@manager.route('/chat/<chat_id>/session/<session_id>/completion', methods=['POST'])
+@manager.route('/chat/<chat_id>/completion', methods=['POST'])
@token_required
-def completion(tenant_id, chat_id, session_id):
+def completion(tenant_id,chat_id):
    req = request.json
    # req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
    #    {"role": "user", "content": "上海有吗？"}
    # ]}
+    if not req.get("session_id"):
+        conv = {
+            "id": get_uuid(),
+            "dialog_id": chat_id,
+            "name": req.get("name", "New session"),
+            "message": [{"role": "assistant", "content": "Hi! I am your assistant，can I help you?"}]
+        }
+        if not conv.get("name"):
+            return get_error_data_result(retmsg="Name can not be empty.")
+        ConversationService.save(**conv)
+        e, conv = ConversationService.get_by_id(conv["id"])
+        session_id=conv.id
+    else:
+        session_id = req.get("session_id")
    if not req.get("question"):
        return get_error_data_result(retmsg="Please input your question.")
-    conv = ConversationService.query(id=session_id, dialog_id=chat_id)
+    conv = ConversationService.query(id=session_id,dialog_id=chat_id)
    if not conv:
        return get_error_data_result(retmsg="Session does not exist")
    conv = conv[0]
@ -117,17 +129,18 @@ def completion(tenant_id, chat_id, session_id):
        conv.message[-1] = {"role": "assistant", "content": ans["answer"],
                            "id": message_id, "prompt": ans.get("prompt", "")}
        ans["id"] = message_id
+        ans["session_id"]=session_id

    def stream():
        nonlocal dia, msg, req, conv
        try:
            for ans in chat(dia, msg, **req):
                fillin_conv(ans)
-                yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
+                yield "data:" + json.dumps({"code": 0,  "data": ans}, ensure_ascii=False) + "\n\n"
            ConversationService.update_by_id(conv.id, conv.to_dict())
        except Exception as e:
            yield "data:" + json.dumps({"code": 500, "message": str(e),
-                                        "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
+                                        "data": {"answer": "**ERROR**: " + str(e),"reference": []}},
                                       ensure_ascii=False) + "\n\n"
        yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"

@ -148,15 +161,14 @@ def completion(tenant_id, chat_id, session_id):
            break
        return get_result(data=answer)

-
@manager.route('/chat/<chat_id>/session', methods=['GET'])
@token_required
-def list(chat_id, tenant_id):
+def list(chat_id,tenant_id):
    if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
        return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
    id = request.args.get("id")
    name = request.args.get("name")
-    session = ConversationService.query(id=id, name=name, dialog_id=chat_id)
+    session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
    if not session:
        return get_error_data_result(retmsg="The session doesn't exist")
    page_number = int(request.args.get("page", 1))
@ -166,7 +178,7 @@ def list(chat_id, tenant_id):
        desc = False
    else:
        desc = True
-    convs = ConversationService.get_list(chat_id, page_number, items_per_page, orderby, desc, id, name)
+    convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
    if not convs:
        return get_result(data=[])
    for conv in convs:
@ -201,17 +213,16 @@ def list(chat_id, tenant_id):
        del conv["reference"]
    return get_result(data=convs)

-
@manager.route('/chat/<chat_id>/session', methods=["DELETE"])
@token_required
-def delete(tenant_id, chat_id):
+def delete(tenant_id,chat_id):
    if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
        return get_error_data_result(retmsg="You don't own the chat")
    ids = request.json.get("ids")
    if not ids:
        return get_error_data_result(retmsg="`ids` is required in deleting operation")
    for id in ids:
-        conv = ConversationService.query(id=id, dialog_id=chat_id)
+        conv = ConversationService.query(id=id,dialog_id=chat_id)
        if not conv:
            return get_error_data_result(retmsg="The chat doesn't own the session")
        ConversationService.delete_by_id(id)
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@ -61,14 +61,13 @@ class DocumentService(CommonService):
            docs = docs.where(
                fn.LOWER(cls.model.name).contains(keywords.lower())
            )
-        count = docs.count()
        if desc:
            docs = docs.order_by(cls.model.getter_by(orderby).desc())
        else:
            docs = docs.order_by(cls.model.getter_by(orderby).asc())

        docs = docs.paginate(page_number, items_per_page)
-
+        count = docs.count()
        return list(docs.dicts()), count


--- a/api/http_api.md
+++ b/api/http_api.md
@ -432,18 +432,71 @@ The error response includes a JSON object like the following:
 }
 ```

+## Delete files from a dataset
+
+**DELETE** `/api/v1/dataset/{dataset_id}/document `
+
+Delete files from a dataset
+
+### Request
+
+- Method: DELETE
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document`
+- Headers:
+  - 'Content-Type: application/json'
+  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
+- Body:
+  - `ids`:List[str]
+#### Request example
+
+```bash
+curl --request DELETE \
+  --url http://{address}/api/v1/dataset/{dataset_id}/document \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: {YOUR ACCESS TOKEN}' \
+  --data '{
+  "ids": ["id_1","id_2"]
+  }'
+```
+
+#### Request parameters
+
+- `"ids"`: (*Body parameter*)
+    The ids of teh documents to be deleted
+### Response
+
+The successful response includes a JSON object like the following:
+
+```json
+{
+    "code": 0
+}.
+```
+
+- `"error_code"`: `integer`  
+  `0`: The operation succeeds.
+
+  
+The error response includes a JSON object like the following:
+
+```json
+{
+    "code": 102,
+    "message": "You do not own the dataset 7898da028a0511efbf750242ac1220005."
+}
+```
+
 ## Download a file from a dataset

 **GET** `/api/v1/dataset/{dataset_id}/document/{document_id}`

-Downloads files from a dataset. 
+Downloads a file from a dataset. 

 ### Request

 - Method: GET
- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}`
 - Headers:
-  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 - Output:
  - '{FILE_NAME}'
@ -451,10 +504,9 @@ Downloads files from a dataset.

 ```bash
 curl --request GET \
-     --url http://{address}/api/v1/dataset/{dataset_id}/document/{documents_id} \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --output '{FILE_NAME}'
+  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id} \
+  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
+  --output ./ragflow.txt
 ```

 #### Request parameters
@ -466,7 +518,7 @@ curl --request GET \

 ### Response

-The successful response includes a JSON object like the following:
+The successful response includes a text object like the following:

 ```text
 test_2.
@ -596,92 +648,39 @@ Update a file in a dataset
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-
+- Body:
+  - `name`:`string`
+  - `parser_method`:`string`
+  - `parser_config`:`dict`
 #### Request example

 ```bash
 curl --request PUT \
-  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id} \
+  --url http://{address}/api/v1/dataset/{dataset_id}/info/{document_id} \
  --header 'Authorization: Bearer {YOUR_ACCESS TOKEN}' \
  --header 'Content-Type: application/json' \
  --data '{
  "name": "manual.txt", 
-  "thumbnail": null, 
-  "knowledgebase_id": "779333c0758611ef910f0242ac120004", 
  "parser_method": "manual", 
-  "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。；！？", "layout_recognize": true, "task_page_size": 12}, 
-  "source_type": "local", "type": "doc", 
-  "created_by": "134408906b6811efbcd20242ac120005", 
-  "size": 0, "token_count": 0, "chunk_count": 0, 
-  "progress": 0.0, 
-  "progress_msg": "", 
-  "process_begin_at": null, 
-  "process_duration": 0.0
+  "parser_config": {"chunk_token_count": 128, "delimiter": "\n!?。；！？", "layout_recognize": true, "task_page_size": 12}
  }'

 ```

 #### Request parameters

- `"thumbnail"`: (*Body parameter*)  
-    Thumbnail image of the document.  
-    - `""`
-
- `"knowledgebase_id"`: (*Body parameter*)  
-    Knowledge base ID related to the document.  
-    - `""`
-
 - `"parser_method"`: (*Body parameter*)  
    Method used to parse the document.  
-    - `""`
+

 - `"parser_config"`: (*Body parameter*)  
    Configuration object for the parser.  
    - If the value is `None`, a dictionary with default values will be generated.

- `"source_type"`: (*Body parameter*)  
-    Source type of the document.  
-    - `""`
-
- `"type"`: (*Body parameter*)  
-    Type or category of the document.  
-    - `""`
-
- `"created_by"`: (*Body parameter*)  
-    Creator of the document.  
-    - `""`
-
 - `"name"`: (*Body parameter*)  
    Name or title of the document.  
-    - `""`

- `"size"`: (*Body parameter*)  
-    Size of the document in bytes or some other unit.  
-    - `0`

- `"token_count"`: (*Body parameter*)  
-    Number of tokens in the document.  
-    - `0`
-
- `"chunk_count"`: (*Body parameter*)  
-    Number of chunks the document is split into.  
-    - `0`
-
- `"progress"`: (*Body parameter*)  
-    Current processing progress as a percentage.  
-    - `0.0`
-
- `"progress_msg"`: (*Body parameter*)  
-    Message indicating current progress status.  
-    - `""`
-
- `"process_begin_at"`: (*Body parameter*)  
-    Start time of the document processing.  
-    - `None`
-
- `"process_duration"`: (*Body parameter*)  
-    Duration of the processing in seconds or minutes.  
-    - `0.0`


 ### Response
@ -712,34 +711,34 @@ Parse files into chunks in a dataset
 ### Request

 - Method: POST
- URL: `/api/v1/dataset/{dataset_id}/chunk`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/chunk `
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
+- Body:
+  - `document_ids`:List[str]

 #### Request example

-```shell
+```bash
 curl --request POST \
-     --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-         "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
-     }'
+    --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
+    --header 'Content-Type: application/json' \
+    --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
+    --data '{"document_ids": ["97a5f1c2759811efaa500242ac120004","97ad64b6759811ef9fc30242ac120004"]}'
 ```

 #### Request parameters

 - `"dataset_id"`: (*Path parameter*)
- `"documents"`: (*Body parameter*)
-  - Documents to parse
+- `"document_ids"`:(*Body parameter*)  
+  The ids of the documents to be parsed

 ### Response

 The successful response includes a JSON object like the following:

-```shell
+```json
 {
    "code": 0
 }
@ -747,10 +746,10 @@ The successful response includes a JSON object like the following:
  
 The error response includes a JSON object like the following:

-```shell
+```json
 {
-    "code": 3016,
-    "message": "Can't connect database"
+    "code": 102,
+    "message": "`document_ids` is required"
 }
 ```

@ -762,35 +761,35 @@ Stop file parsing

 ### Request

- Method: POST
- URL: `/api/v1/dataset/{dataset_id}/chunk`
+- Method: DELETE
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/chunk`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-
+- Body:
+  - `document_ids`:List[str]
 #### Request example

-```shell
+```bash
 curl --request DELETE \
-     --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-         "documents": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
-     }'
+   --url http://{address}/api/v1/dataset/{dataset_id}/chunk \
+   --header 'Content-Type: application/json' \
+   --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
+   --data '{"document_ids": ["97a5f1c2759811efaa500242ac120004","97ad64b6759811ef9fc30242ac120004"]}'
 ```

 #### Request parameters

 - `"dataset_id"`: (*Path parameter*)
- `"documents"`: (*Body parameter*)
-  - Documents to stop parsing
+- `"document_ids"`:(*Body parameter*)  
+  The ids of the documents to be parsed
+

 ### Response

 The successful response includes a JSON object like the following:

-```shell
+```json
 {
    "code": 0
 }
@ -798,104 +797,98 @@ The successful response includes a JSON object like the following:
  
 The error response includes a JSON object like the following:

-```shell
+```json
 {
-    "code": 3016,
-    "message": "Can't connect database"
+    "code": 102,
+    "message": "`document_ids` is required"
 }
 ```

 ## Get document chunk list

-**GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+**GET** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`

 Get document chunk list

 ### Request

 - Method: GET
- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id}`
 - Headers:
-  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'

 #### Request example

-```shell
+```bash
 curl --request GET \
-     --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
+  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk?keywords={keywords}&offset={offset}&limit={limit}&id={id} \
+  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' 
 ```

 #### Request parameters

 - `"dataset_id"`: (*Path parameter*)
 - `"document_id"`: (*Path parameter*)
-
+- `"offset"`(*Filter parameter*)  
+  The beginning number of records for paging.
+- `"keywords"`(*Filter parameter*)  
+  List chunks whose name has the given keywords
+- `"limit"`(*Filter parameter*)  
+  Records number to return
+- `"id"`(*Filter parameter*)  
+  The id of chunk to be got
 ### Response

 The successful response includes a JSON object like the following:

-```shell
+```json
 {
-    "code": 0
+    "code": 0,
    "data": {
-        "chunks": [
-            {
-                "available_int": 1,
-                "content": "<em>advantag</em>of ragflow increas accuraci and relev:by incorpor retriev inform , ragflow can gener respons that are more accur",
-                "document_keyword": "ragflow_test.txt",
-                "document_id": "77df9ef4759a11ef8bdd0242ac120004",
-                "id": "4ab8c77cfac1a829c8d5ed022a0808c0",
-                "image_id": "",
-                "important_keywords": [],
-                "positions": [
-                    ""
-                ]
-            }
-        ],
+        "chunks": [],
        "doc": {
-            "chunk_count": 5,
-            "create_date": "Wed, 18 Sep 2024 08:46:16 GMT",
-            "create_time": 1726649176833,
-            "created_by": "134408906b6811efbcd20242ac120005",
-            "id": "77df9ef4759a11ef8bdd0242ac120004",
-            "knowledgebase_id": "77d9d24e759a11ef880c0242ac120004",
-            "location": "ragflow_test.txt",
-            "name": "ragflow_test.txt",
+            "chunk_num": 0,
+            "create_date": "Sun, 29 Sep 2024 03:47:29 GMT",
+            "create_time": 1727581649216,
+            "created_by": "69736c5e723611efb51b0242ac120007",
+            "id": "8cb781ec7e1511ef98ac0242ac120006",
+            "kb_id": "c7ee74067a2c11efb21c0242ac120006",
+            "location": "明天的天气是晴天.txt",
+            "name": "明天的天气是晴天.txt",
            "parser_config": {
-                "chunk_token_count": 128,
-                "delimiter": "\n!?。；！？",
-                "layout_recognize": true,
-                "task_page_size": 12
+                "pages": [
+                    [
+                        1,
+                        1000000
+                    ]
+                ]
            },
-            "parser_method": "naive",
-            "process_begin_at": "Wed, 18 Sep 2024 08:46:16 GMT",
-            "process_duation": 7.3213,
-            "progress": 1.0,
-            "progress_msg": "\nTask has been received.\nStart to parse.\nFinish parsing.\nFinished slicing files(5). Start to embedding the content.\nFinished embedding(6.16)! Start to build index!\nDone!",
-            "run": "3",
-            "size": 4209,
+            "parser_id": "naive",
+            "process_begin_at": "Tue, 15 Oct 2024 10:23:51 GMT",
+            "process_duation": 1435.37,
+            "progress": 0.0370833,
+            "progress_msg": "\nTask has been received.",
+            "run": "1",
+            "size": 24,
            "source_type": "local",
            "status": "1",
            "thumbnail": null,
-            "token_count": 746,
+            "token_num": 0,
            "type": "doc",
-            "update_date": "Wed, 18 Sep 2024 08:46:23 GMT",
-            "update_time": 1726649183321
+            "update_date": "Tue, 15 Oct 2024 10:47:46 GMT",
+            "update_time": 1728989266371
        },
-        "total": 1
-    },
+        "total": 0
+    }
 }
 ```
  
 The error response includes a JSON object like the following:

-```shell
+```json
 {
-    "code": 3016,
-    "message": "Can't connect database"
+    "code": 102,
+    "message": "You don't own the document 5c5999ec7be811ef9cab0242ac12000e5."
 }
 ```

@ -908,55 +901,96 @@ Delete document chunks
 ### Request

 - Method: DELETE
- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
+- Body:
+  - `chunk_ids`:List[str]

 #### Request example

-```shell
+```bash
 curl --request DELETE \
-     --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-         "chunks": ["f6b170ac758811efa0660242ac120004", "97ad64b6759811ef9fc30242ac120004"]
-     }'
+  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
+  --data '{
+  "chunk_ids": ["test_1", "test_2"]
+  }'
 ```
+#### Request parameters
+
+- `"chunk_ids"`:(*Body parameter*)
+  The chunks of the document to be deleted
+
+### Response
+Success
+```json
+{
+    "code": 0
+}
+```
+Error
+```json
+{
+    "code": 102,
+    "message": "`chunk_ids` is required"
+}
+```
+

 ## Update document chunk

-**PUT** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+**PUT** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`

 Update document chunk

 ### Request

 - Method: PUT
- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-
+- Body:
+  - `content`:str
+  - `important_keywords`:str
+  - `available`:int
 #### Request example

-```shell
+```bash
 curl --request PUT \
-     --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-        "chunk_id": "d87fb0b7212c15c18d0831677552d7de",  
-        "knowledgebase_id": null,  
-        "name": "",  
-        "content": "ragflow123",  
-        "important_keywords": [],   
-        "document_id": "e6bbba92759511efaa900242ac120004",  
-        "status": "1" 
-     }'
+  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id} \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: {YOUR_ACCESS_TOKEN}' \
+  --data '{   
+    "content": "ragflow123",  
+    "important_keywords": [],   
+}'
 ```
+#### Request parameters
+- `"content"`:(*Body parameter*)
+  Contains the main text or information of the chunk.
+- `"important_keywords"`:(*Body parameter*)
+  list the key terms or phrases that are significant or central to the chunk's content.
+- `"available"`:(*Body parameter*)
+   Indicating the availability status, 0 means unavailable and 1 means available.

+### Response
+Success
+```json
+{
+    "code": 0
+}
+```
+Error
+```json
+{
+    "code": 102,
+    "message": "Can't find this chunk 29a2d9987e16ba331fb4d7d30d99b71d2"
+}
+```
 ## Insert document chunks

 **POST** `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
@ -966,50 +1000,187 @@ Insert document chunks
 ### Request

 - Method: POST
- URL: `/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
+- URL: `http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-
+- Body:
+  - `content`: str
+  - `important_keywords`:List[str]
 #### Request example

-```shell
+```bash
 curl --request POST \
-     --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-         "document_id": "97ad64b6759811ef9fc30242ac120004",
-         "content": ["ragflow content", "ragflow content"]
-     }'
+  --url http://{address}/api/v1/dataset/{dataset_id}/document/{document_id}/chunk \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
+  --data '{
+    "content": "ragflow content"
+}'
+```
+#### Request parameters
+- `content`:(*Body parameter*)  
+  Contains the main text or information of the chunk.
+- `important_keywords`(*Body parameter*)  
+  list the key terms or phrases that are significant or central to the chunk's content.
+
+### Response
+Success
+```json
+{
+    "code": 0,
+    "data": {
+        "chunk": {
+            "content": "ragflow content",
+            "create_time": "2024-10-16 08:05:04",
+            "create_timestamp": 1729065904.581025,
+            "dataset_id": [
+                "c7ee74067a2c11efb21c0242ac120006"
+            ],
+            "document_id": "5c5999ec7be811ef9cab0242ac120005",
+            "id": "d78435d142bd5cf6704da62c778795c5",
+            "important_keywords": []
+        }
+    }
+}
 ```

+Error
+```json
+{
+    "code": 102,
+    "message": "`content` is required"
+}
+```
 ## Dataset retrieval test

-**GET** `/api/v1/dataset/{dataset_id}/retrieval`
+**GET** `/api/v1/retrieval`

 Retrieval test of a dataset

 ### Request

- Method: GET
- URL: `/api/v1/dataset/{dataset_id}/retrieval`
+- Method: POST
+- URL: `http://{address}/api/v1/retrieval`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-
+- Body:
+  - `question`: str  
+  - `datasets`: List[str]  
+  - `documents`: List[str]
+  - `offset`: int  
+  - `limit`: int  
+  - `similarity_threshold`: float  
+  - `vector_similarity_weight`: float  
+  - `top_k`: int  
+  - `rerank_id`: string  
+  - `keyword`: bool  
+  - `highlight`: bool
 #### Request example

-```shell
-curl --request GET \
-     --url http://{address}/api/v1/dataset/{dataset_id}/retrieval \
-     --header 'Content-Type: application/json' \
-     --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
-     --raw '{
-         "query_text": "This is a cat."
-     }'
+```bash
+curl --request POST \
+  --url http://{address}/api/v1/retrieval \
+  --header 'Content-Type: application/json' \
+  --header 'Authorization: {YOUR_ACCESS_TOKEN}' \
+  --data '{
+    "question": "What is advantage of ragflow?",
+    "datasets": [
+        "b2a62730759d11ef987d0242ac120004"
+    ],
+    "documents": [
+        "77df9ef4759a11ef8bdd0242ac120004"
+    ]
+}'
 ```

+#### Request parameter
+- `"question"`: (*Body parameter*)  
+  User's question, search keywords  
+  `""`
+- `"datasets"`: (*Body parameter*)  
+  The scope of datasets  
+  `None`
+- `"documents"`: (*Body parameter*)  
+  The scope of document. `None` means no limitation  
+  `None`
+- `"offset"`: (*Body parameter*)  
+  The beginning point of retrieved records  
+  `1`
+
+- `"limit"`: (*Body parameter*)  
+  The maximum number of records needed to return  
+  `30`
+
+- `"similarity_threshold"`: (*Body parameter*)  
+  The minimum similarity score  
+  `0.2`
+
+- `"vector_similarity_weight"`: (*Body parameter*)  
+  The weight of vector cosine similarity, `1 - x` is the term similarity weight  
+  `0.3`
+
+- `"top_k"`: (*Body parameter*)  
+  Number of records engaged in vector cosine computation  
+  `1024`
+
+- `"rerank_id"`: (*Body parameter*)  
+  ID of the rerank model  
+  `None`
+
+- `"keyword"`: (*Body parameter*)  
+  Whether keyword-based matching is enabled  
+  `False`
+
+- `"highlight"`: (*Body parameter*)  
+  Whether to enable highlighting of matched terms in the results  
+  `False`
+### Response
+Success
+```json
+{
+    "code": 0,
+    "data": {
+        "chunks": [
+            {
+                "content": "ragflow content",
+                "content_ltks": "ragflow content",
+                "document_id": "5c5999ec7be811ef9cab0242ac120005",
+                "document_keyword": "1.txt",
+                "highlight": "<em>ragflow</em> content",
+                "id": "d78435d142bd5cf6704da62c778795c5",
+                "img_id": "",
+                "important_keywords": [
+                    ""
+                ],
+                "kb_id": "c7ee74067a2c11efb21c0242ac120006",
+                "positions": [
+                    ""
+                ],
+                "similarity": 0.9669436601210759,
+                "term_similarity": 1.0,
+                "vector_similarity": 0.8898122004035864
+            }
+        ],
+        "doc_aggs": [
+            {
+                "count": 1,
+                "doc_id": "5c5999ec7be811ef9cab0242ac120005",
+                "doc_name": "1.txt"
+            }
+        ],
+        "total": 1
+    }
+}
+```
+Error
+```json
+{
+    "code": 102,
+    "message": "`datasets` is required."
+}
+```
 ## Create chat

 **POST** `/api/v1/chat`
@ -1708,26 +1879,27 @@ Error

 ## Chat with a chat session

-**POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
+**POST** `/api/v1/chat/{chat_id}/completion`

 Chat with a chat session

 ### Request

 - Method: POST
- URL: `http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion`
+- URL: `http://{address} /api/v1/chat/{chat_id}/completion`
 - Headers:
  - `content-Type: application/json`
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 - Body:
  - `question`: string
  - `stream`: bool
+  - `session_id`: str


 #### Request example
 ```bash
 curl --request POST \
-  --url http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion \
+  --url http://{address} /api/v1/chat/{chat_id}/completion \
  --header 'Content-Type: application/json' \
  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
  --data-binary '{
@ -1743,6 +1915,8 @@ curl --request POST \
 - `stream`: (*Body Parameter*)  
    The approach of streaming text generation.
    `False`
+- `session_id`: (*Body Parameter*)  
+    The id of session.If not provided, a new session will be generated.
 ### Response
 Success
 ```json
--- a/api/python_api_reference.md
+++ b/api/python_api_reference.md
@ -244,42 +244,117 @@ File management inside knowledge base
 ## Upload document

 ```python
-RAGFLOW.upload_document(ds:DataSet, name:str, blob:bytes)-> bool
+DataSet.upload_documents(document_list: List[dict])
 ```

 ### Parameters

-#### name
-
-#### blob
-
+#### document_list:`List[dict]`
+A list composed of dicts containing `name` and `blob`.


 ### Returns
+no return

+### Examples
+```python
+from ragflow import RAGFlow
+
+rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
+ds = rag.create_dataset(name="kb_1")
+ds.upload_documents([{name="1.txt", blob="123"}, ...] }
+```
+---
+
+## Update document
+
+```python
+Document.update(update_message:dict)
+```
+
+### Parameters
+
+#### update_message:`dict`  
+only `name`,`parser_config`,`parser_method` can be changed
+
+### Returns
+
+no return

 ### Examples

+```python
+from ragflow import RAGFlow
+
+rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
+ds=rag.list_datasets(id='id')
+ds=ds[0]
+doc = ds.list_documents(id="wdfxb5t547d")
+doc = doc[0]
+doc.update([{"parser_method": "manual"...}])
+```
+
 ---

-## Retrieve document
+## Download document

 ```python
-RAGFlow.get_document(id:str=None,name:str=None) -> Document
+Document.download() -> bytes
+```
+
+### Returns
+
+bytes of the document.
+
+### Examples
+
+```python
+from ragflow import RAGFlow
+
+rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
+ds=rag.list_datasets(id="id")
+ds=ds[0]
+doc = ds.list_documents(id="wdfxb5t547d")
+doc = doc[0]
+open("~/ragflow.txt", "wb+").write(doc.download())
+print(doc)
+```
+
+---
+
+## List documents
+
+```python
+Dataset.list_documents(id:str =None, keywords: str=None, offset: int=0, limit:int = 1024,order_by:str = "create_time", desc: bool = True) -> List[Document]
 ```

 ### Parameters

-#### id: `str`, *Required*
+#### id: `str`

-ID of the document to retrieve.
+The id of the document to be got

-#### name: `str`
+#### keywords: `str`

-Name or title of the document.
+List documents whose name has the given keywords. Defaults to `None`.

+#### offset: `int`
+
+The beginning number of records for paging. Defaults to `0`.
+
+#### limit: `int`
+
+Records number to return, -1 means all of them. Records number to return, -1 means all of them.
+
+#### orderby: `str`
+The field by which the records should be sorted. This specifies the attribute or column used to order the results.
+
+#### desc:`bool`
+A boolean flag indicating whether the sorting should be in descending order.
 ### Returns

+List[Document]  
+
 A document object containing the following attributes:

 #### id: `str`
@ -352,98 +427,14 @@ Duration of the processing in seconds or minutes. Defaults to `0.0`.
 ```python
 from ragflow import RAGFlow

-rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d",name='testdocument.txt')
-print(doc)
-```
-
---
-
-## Save document settings
-
-```python
-Document.save() -> bool
-```
-
-### Returns
-
-bool
-
-### Examples
-
-```python
-from ragflow import RAGFlow
-
-rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d")
-doc.parser_method= "manual"
-doc.save()
-```
-
---
-
-## Download document
-
-```python
-Document.download() -> bytes
-```
-
-### Returns
-
-bytes of the document.
-
-### Examples
-
-```python
-from ragflow import RAGFlow
-
-rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d")
-open("~/ragflow.txt", "w+").write(doc.download())
-print(doc) 
-```
-
---
-
-## List documents
-
-```python
-Dataset.list_docs(keywords: str=None, offset: int=0, limit:int = -1) -> List[Document]
-```
-
-### Parameters
-
-#### keywords: `str`
-
-List documents whose name has the given keywords. Defaults to `None`.
-
-#### offset: `int`
-
-The beginning number of records for paging. Defaults to `0`.
-
-#### limit: `int`
-
-Records number to return, -1 means all of them. Records number to return, -1 means all of them.
-
-### Returns
-
-List[Document]
-
-### Examples
-
-```python
-from ragflow import RAGFlow
-
 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
 ds = rag.create_dataset(name="kb_1")

 filename1 = "~/ragflow.txt"
-rag.create_document(ds, name=filename1 , blob=open(filename1 , "rb").read())
-
-filename2 = "~/infinity.txt"
-rag.create_document(ds, name=filename2 , blob=open(filename2 , "rb").read())
-
-for d in ds.list_docs(keywords="rag", offset=0, limit=12):
+blob=open(filename1 , "rb").read()
+list_files=[{"name":filename1,"blob":blob}]
+ds.upload_documents(list_files)
+for d in ds.list_documents(keywords="rag", offset=0, limit=12):
    print(d)
 ```

@ -452,12 +443,11 @@ for d in ds.list_docs(keywords="rag", offset=0, limit=12):
 ## Delete documents

 ```python
-Document.delete() -> bool
+DataSet.delete_documents(ids: List[str] = None)
 ```
 ### Returns

-bool
-description: delete success or not
+no return

 ### Examples

@ -465,119 +455,87 @@ description: delete success or not
 from ragflow import RAGFlow

 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-ds = rag.create_dataset(name="kb_1")
-
-filename1 = "~/ragflow.txt"
-rag.create_document(ds, name=filename1 , blob=open(filename1 , "rb").read())
-
-filename2 = "~/infinity.txt"
-rag.create_document(ds, name=filename2 , blob=open(filename2 , "rb").read())
-for d in ds.list_docs(keywords="rag", offset=0, limit=12):
-    d.delete()
+ds = rag.list_datasets(name="kb_1")
+ds = ds[0]
+ds.delete_documents(ids=["id_1","id_2"])
 ```

 ---

-## Parse document
+## Parse and stop parsing document

 ```python
-Document.async_parse() -> None
-RAGFLOW.async_parse_documents() -> None
+DataSet.async_parse_documents(document_ids:List[str]) -> None
+DataSet.async_cancel_parse_documents(document_ids:List[str])-> None
 ```

 ### Parameters

+#### document_ids:`List[str]`
+The ids of the documents to be parsed
 ????????????????????????????????????????????????????

 ### Returns
-
+no return
 ????????????????????????????????????????????????????

 ### Examples

-```python
-#document parse and cancel
-rag = RAGFlow(API_KEY, HOST_ADDRESS)
-ds = rag.create_dataset(name="dataset_name")
-name3 = 'ai.pdf'
-path = 'test_data/ai.pdf'
-rag.create_document(ds, name=name3, blob=open(path, "rb").read())
-doc = rag.get_document(name="ai.pdf")
-doc.async_parse()
-print("Async parsing initiated")
-```
-
---
-
-## Cancel document parsing
-
-```python
-rag.async_cancel_parse_documents(ids)
-RAGFLOW.async_cancel_parse_documents()-> None
-```
-
-### Parameters
-
-#### ids, `list[]`
-
-### Returns
-
-?????????????????????????????????????????????????
-
-### Examples
-
 ```python
 #documents parse and cancel
 rag = RAGFlow(API_KEY, HOST_ADDRESS)
 ds = rag.create_dataset(name="God5")
 documents = [
-    {'name': 'test1.txt', 'path': 'test_data/test1.txt'},
-    {'name': 'test2.txt', 'path': 'test_data/test2.txt'},
-    {'name': 'test3.txt', 'path': 'test_data/test3.txt'}
+    {'name': 'test1.txt', 'blob': open('./test_data/test1.txt',"rb").read()},
+    {'name': 'test2.txt', 'blob': open('./test_data/test2.txt',"rb").read()},
+    {'name': 'test3.txt', 'blob': open('./test_data/test3.txt',"rb").read()}
 ]
-
-# Create documents in bulk
-for doc_info in documents:
-    with open(doc_info['path'], "rb") as file:
-        created_doc = rag.create_document(ds, name=doc_info['name'], blob=file.read())
-docs = [rag.get_document(name=doc_info['name']) for doc_info in documents]
-ids = [doc.id for doc in docs]
-
-rag.async_parse_documents(ids)
+ds.upload_documents(documents)
+documents=ds.list_documents(keywords="test")
+ids=[]
+for document in documents:
+    ids.append(document.id)
+ds.async_parse_documents(ids)
 print("Async bulk parsing initiated")
-
-for doc in docs:
-    for progress, msg in doc.join(interval=5, timeout=10):
-        print(f"{doc.name}: Progress: {progress}, Message: {msg}")
-
-cancel_result = rag.async_cancel_parse_documents(ids)
+ds.async_cancel_parse_documents(ids)
 print("Async bulk parsing cancelled")
 ```

---
-
-## Join document
-
-??????????????????
-
+## List chunks
 ```python
-Document.join(interval=15, timeout=3600) -> iteral[Tuple[float, str]]
+Document.list_chunks(keywords: str = None, offset: int = 0, limit: int = -1, id : str = None) -> List[Chunk]
 ```
-
 ### Parameters

-#### interval: `int`
+- `keywords`: `str`  
+  List chunks whose name has the given keywords  
+  default: `None`

-Time interval in seconds for progress report. Defaults to `15`.
+- `offset`: `int`  
+  The beginning number of records for paging  
+  default: `1`

-#### timeout: `int`
-
-Timeout in seconds. Defaults to `3600`.
+- `limit`: `int`  
+  Records number to return  
+  default: `30`

+- `id`: `str`  
+  The ID of the chunk to be retrieved  
+  default: `None`
 ### Returns
+List[chunk]

-iteral[Tuple[float, str]]
+### Examples
+```python
+from ragflow import RAGFlow

+rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
+ds = rag.list_datasets("123")
+ds = ds[0]
+ds.async_parse_documents(["wdfxb5t547d"])
+for c in doc.list_chunks(keywords="rag", offset=0, limit=12):
+    print(c)
+```
 ## Add chunk

 ```python
@ -587,6 +545,9 @@ Document.add_chunk(content:str) -> Chunk
 ### Parameters

 #### content: `str`, *Required*
+Contains the main text or information of the chunk.
+#### important_keywords :`List[str]`
+list the key terms or phrases that are significant or central to the chunk's content.

 ### Returns

@ -598,7 +559,10 @@ chunk
 from ragflow import RAGFlow

 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d")
+ds = rag.list_datasets(id="123")
+ds = ds[0]
+doc = ds.list_documents(id="wdfxb5t547d")
+doc = doc[0]
 chunk = doc.add_chunk(content="xxxxxxx")
 ```

@ -607,12 +571,15 @@ chunk = doc.add_chunk(content="xxxxxxx")
 ## Delete chunk

 ```python
-Chunk.delete() -> bool
+Document.delete_chunks(chunk_ids: List[str])
 ```
+### Parameters
+#### chunk_ids:`List[str]`
+The list of chunk_id

 ### Returns

-bool
+no return

 ### Examples

@ -620,22 +587,34 @@ bool
 from ragflow import RAGFlow

 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d")
+ds = rag.list_datasets(id="123")
+ds = ds[0]
+doc = ds.list_documents(id="wdfxb5t547d")
+doc = doc[0]
 chunk = doc.add_chunk(content="xxxxxxx")
-chunk.delete()
+doc.delete_chunks(["id_1","id_2"])
 ```

 ---

-## Save chunk contents
+## Update chunk

 ```python
-Chunk.save() -> bool
+Chunk.update(update_message: dict)
 ```
+### Parameters
+- `content`: `str`  
+  Contains the main text or information of the chunk
+
+- `important_keywords`: `List[str]`  
+  List the key terms or phrases that are significant or central to the chunk's content
+
+- `available`: `int`  
+  Indicating the availability status, `0` means unavailable and `1` means available

 ### Returns

-bool
+no return

 ### Examples

@ -643,10 +622,12 @@ bool
 from ragflow import RAGFlow

 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-doc = rag.get_document(id="wdfxb5t547d")
+ds = rag.list_datasets(id="123")
+ds = ds[0]
+doc = ds.list_documents(id="wdfxb5t547d")
+doc = doc[0]
 chunk = doc.add_chunk(content="xxxxxxx")
-chunk.content = "sdfx"
-chunk.save()
+chunk.update({"content":"sdfx...})
 ```

 ---
@ -654,7 +635,7 @@ chunk.save()
 ## Retrieval

 ```python
-RAGFlow.retrieval(question:str, datasets:List[Dataset], document=List[Document]=None,     offset:int=0, limit:int=6, similarity_threshold:float=0.1, vector_similarity_weight:float=0.3, top_k:int=1024) -> List[Chunk]
+RAGFlow.retrieve(question:str="", datasets:List[str]=None, document=List[str]=None, offset:int=1, limit:int=30, similarity_threshold:float=0.2, vector_similarity_weight:float=0.3, top_k:int=1024,rerank_id:str=None,keyword:bool=False,higlight:bool=False) -> List[Chunk]
 ```

 ### Parameters
@ -691,6 +672,15 @@ The weight of vector cosine similarity, 1 - x is the term similarity weight. Def

 Number of records engaged in vector cosine computaton. Defaults to `1024`.

+#### rerank_id:`str`
+ID of the rerank model.  Defaults to `None`.
+
+#### keyword:`bool`
+Indicating whether keyword-based matching is enabled (True) or disabled (False).
+
+#### highlight:`bool`
+
+Specifying whether to enable highlighting of matched terms in the results (True) or not (False).
 ### Returns

 List[Chunk]
@ -701,18 +691,17 @@ List[Chunk]
 from ragflow import RAGFlow

 rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
-ds = rag.get_dataset(name="ragflow")
+ds = rag.list_datasets(name="ragflow")
+ds = ds[0]
 name = 'ragflow_test.txt'
-path = 'test_data/ragflow_test.txt'
+path = './test_data/ragflow_test.txt'
 rag.create_document(ds, name=name, blob=open(path, "rb").read())
-doc = rag.get_document(name=name)
-doc.async_parse()
-# Wait for parsing to complete 
-for progress, msg in doc.join(interval=5, timeout=30):
-    print(progress, msg)
-for c in rag.retrieval(question="What's ragflow?", 
-             datasets=[ds], documents=[doc], 
-             offset=0, limit=6, similarity_threshold=0.1, 
+doc = ds.list_documents(name=name)
+doc = doc[0]
+ds.async_parse_documents([doc.id])
+for c in rag.retrieve(question="What's ragflow?", 
+             datasets=[ds.id], documents=[doc.id], 
+             offset=1, limit=30, similarity_threshold=0.2, 
             vector_similarity_weight=0.3,
             top_k=1024
             ):
--- a/sdk/python/ragflow/modules/chunk.py
+++ b/sdk/python/ragflow/modules/chunk.py
@ -17,32 +17,11 @@ class Chunk(Base):
                res_dict.pop(k)
        super().__init__(rag, res_dict)

-    def delete(self) -> bool:
-        """
-        Delete the chunk in the document.
-        """
-        res = self.post('/doc/chunk/rm',
-                        {"document_id": self.document_id, 'chunk_ids': [self.id]})
-        res = res.json()
-        if res.get("retmsg") == "success":
-            return True
-        raise Exception(res["retmsg"])

-    def save(self) -> bool:
-        """
-        Save the document details to the server.
-        """
-        res = self.post('/doc/chunk/set',
-                        {"chunk_id": self.id,
-                         "knowledgebase_id": self.knowledgebase_id,
-                         "name": self.document_name,
-                         "content": self.content,
-                         "important_keywords": self.important_keywords,
-                         "document_id": self.document_id,
-                         "available": self.available,
-                         })
+    def update(self,update_message:dict):
+        res = self.put(f"/dataset/{self.knowledgebase_id}/document/{self.document_id}/chunk/{self.id}",update_message)
        res = res.json()
-        if res.get("retmsg") == "success":
-            return True
-        raise Exception(res["retmsg"])
+        if res.get("code") != 0 :
+            raise Exception(res["message"])
+

--- a/sdk/python/ragflow/modules/dataset.py
+++ b/sdk/python/ragflow/modules/dataset.py
@ -65,3 +65,14 @@ class DataSet(Base):
        if res.get("code") != 0:
            raise Exception(res["message"])

+    def async_parse_documents(self,document_ids):
+        res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
+        res = res.json()
+        if res.get("code") != 0:
+            raise Exception(res.get("message"))
+
+    def async_cancel_parse_documents(self,document_ids):
+        res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
+        res = res.json()
+        if res.get("code") != 0:
+            raise Exception(res.get("message"))
--- a/sdk/python/ragflow/modules/document.py
+++ b/sdk/python/ragflow/modules/document.py
@ -1,7 +1,10 @@
 import time

+from PIL.ImageFile import raise_oserror
+
 from .base import Base
 from .chunk import Chunk
+from typing import List


 class Document(Base):
@ -29,160 +32,28 @@ class Document(Base):
                res_dict.pop(k)
        super().__init__(rag, res_dict)

-    def update(self,update_message:dict) -> bool:
-        """
-        Save the document details to the server.
-        """
-        res = self.post(f'/dataset/{self.knowledgebase_id}/info/{self.id}',update_message)
+    def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
+        data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
+        res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
        res = res.json()
-        if res.get("code") != 0:
-            raise Exception(res["message"])
+        if res.get("code") == 0:
+            chunks=[]
+            for data in res["data"].get("chunks"):
+                chunk = Chunk(self.rag,data)
+                chunks.append(chunk)
+            return chunks
+        raise Exception(res.get("message"))

-    def delete(self) -> bool:
-        """
-        Delete the document from the server.
-        """
-        res = self.rm('/doc/delete',
-                      {"document_id": self.id})
-        res = res.json()
-        if res.get("retmsg") == "success":
-            return True
-        raise Exception(res["retmsg"])
-
-    def download(self) -> bytes:
-        """
-        Download the document content from the server using the Flask API.
-
-        :return: The downloaded document content in bytes.
-        """
-        # Construct the URL for the API request using the document ID and knowledge base ID
-        res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
-
-        # Check the response status code to ensure the request was successful
-        if res.status_code == 200:
-            # Return the document content as bytes
-            return res.content
-        else:
-            # Handle the error and raise an exception
-            raise Exception(
-                f"Failed to download document. Server responded with: {res.status_code}, {res.text}"
-            )
-
-    def async_parse(self):
-        """
-        Initiate document parsing asynchronously without waiting for completion.
-        """
-        try:
-            # Construct request data including document ID and run status (assuming 1 means to run)
-            data = {"document_ids": [self.id], "run": 1}
-
-            # Send a POST request to the specified parsing status endpoint to start parsing
-            res = self.post(f'/doc/run', data)
-
-            # Check the server response status code
-            if res.status_code != 200:
-                raise Exception(f"Failed to start async parsing: {res.text}")
-
-            print("Async parsing started successfully.")
-
-        except Exception as e:
-            # Catch and handle exceptions
-            print(f"Error occurred during async parsing: {str(e)}")
-            raise
-
-    import time
-
-    def join(self, interval=5, timeout=3600):
-        """
-        Wait for the asynchronous parsing to complete and yield parsing progress periodically.
-
-        :param interval: The time interval (in seconds) for progress reports.
-        :param timeout: The timeout (in seconds) for the parsing operation.
-        :return: An iterator yielding parsing progress and messages.
-        """
-        start_time = time.time()
-        while time.time() - start_time < timeout:
-            # Check the parsing status
-            res = self.get(f'/doc/{self.id}/status', {"document_ids": [self.id]})
-            res_data = res.json()
-            data = res_data.get("data", [])
-
-            # Retrieve progress and status message
-            progress = data.get("progress", 0)
-            progress_msg = data.get("status", "")
-
-            yield progress, progress_msg  # Yield progress and message
-
-            if progress == 100:  # Parsing completed
-                break
-
-            time.sleep(interval)
-
-    def cancel(self):
-        """
-        Cancel the parsing task for the document.
-        """
-        try:
-            # Construct request data, including document ID and action to cancel (assuming 2 means cancel)
-            data = {"document_ids": [self.id], "run": 2}
-
-            # Send a POST request to the specified parsing status endpoint to cancel parsing
-            res = self.post(f'/doc/run', data)
-
-            # Check the server response status code
-            if res.status_code != 200:
-                print("Failed to cancel parsing. Server response:", res.text)
-            else:
-                print("Parsing cancelled successfully.")
-
-        except Exception as e:
-            print(f"Error occurred during async parsing cancellation: {str(e)}")
-            raise
-
-    def list_chunks(self, page=1, offset=0, limit=12,size=30, keywords="", available_int=None):
-        """
-        List all chunks associated with this document by calling the external API.
-
-        Args:
-            page (int): The page number to retrieve (default 1).
-            size (int): The number of chunks per page (default 30).
-            keywords (str): Keywords for searching specific chunks (default "").
-            available_int (int): Filter for available chunks (optional).
-
-        Returns:
-            list: A list of chunks returned from the API.
-        """
-        data = {
-            "document_id": self.id,
-            "page": page,
-            "size": size,
-            "keywords": keywords,
-            "offset":offset,
-            "limit":limit
-        }
-
-        if available_int is not None:
-            data["available_int"] = available_int
-
-        res = self.post(f'/doc/chunk/list', data)
-        if res.status_code == 200:
-            res_data = res.json()
-            if res_data.get("retmsg") == "success":
-                chunks=[]
-                for chunk_data in res_data["data"].get("chunks", []):
-                    chunk=Chunk(self.rag,chunk_data)
-                    chunks.append(chunk)
-                return chunks
-            else:
-                raise Exception(f"Error fetching chunks: {res_data.get('retmsg')}")
-        else:
-            raise Exception(f"API request failed with status code {res.status_code}")

    def add_chunk(self, content: str):
-        res = self.post('/doc/chunk/create', {"document_id": self.id, "content":content})
-        if res.status_code == 200:
-            res_data = res.json().get("data")
-            chunk_data = res_data.get("chunk")
-            return Chunk(self.rag,chunk_data)
-        else:
-            raise Exception(f"Failed to add chunk: {res.status_code} {res.text}")
+        res = self.post(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', {"content":content})
+        res = res.json()
+        if res.get("code") == 0:
+            return Chunk(self.rag,res["data"].get("chunk"))
+        raise Exception(res.get("message"))
+
+    def delete_chunks(self,ids:List[str]):
+        res = self.rm(f"dataset/{self.knowledgebase_id}/document/{self.id}/chunk",{"ids":ids})
+        res = res.json()
+        if res.get("code")!=0:
+            raise Exception(res.get("message"))
--- a/sdk/python/ragflow/modules/session.py
+++ b/sdk/python/ragflow/modules/session.py
@ -15,8 +15,8 @@ class Session(Base):
        for message in self.messages:
            if "reference" in message:
                message.pop("reference")
-        res = self.post(f"/chat/{self.chat_id}/session/{self.id}/completion",
-                        {"question": question, "stream": True}, stream=stream)
+        res = self.post(f"/chat/{self.chat_id}/completion",
+                        {"question": question, "stream": True,"session_id":self.id}, stream=stream)
        for line in res.iter_lines():
            line = line.decode("utf-8")
            if line.startswith("{"):
@ -82,3 +82,4 @@ class Chunk(Base):
        self.term_similarity = None
        self.positions = None
        super().__init__(rag, res_dict)
+
--- a/sdk/python/ragflow/ragflow.py
+++ b/sdk/python/ragflow/ragflow.py
@ -158,105 +158,30 @@ class RAGFlow:
        raise Exception(res["message"])


-
-    def async_parse_documents(self, doc_ids):
-        """
-        Asynchronously start parsing multiple documents without waiting for completion.
-
-        :param doc_ids: A list containing multiple document IDs.
-        """
-        try:
-            if not doc_ids or not isinstance(doc_ids, list):
-                raise ValueError("doc_ids must be a non-empty list of document IDs")
-
-            data = {"document_ids": doc_ids, "run": 1}
-
-            res = self.post(f'/doc/run', data)
-
-            if res.status_code != 200:
-                raise Exception(f"Failed to start async parsing for documents: {res.text}")
-
-            print(f"Async parsing started successfully for documents: {doc_ids}")
-
-        except Exception as e:
-            print(f"Error occurred during async parsing for documents: {str(e)}")
-            raise
-
-    def async_cancel_parse_documents(self, doc_ids):
-        """
-        Cancel the asynchronous parsing of multiple documents.
-
-        :param doc_ids: A list containing multiple document IDs.
-        """
-        try:
-            if not doc_ids or not isinstance(doc_ids, list):
-                raise ValueError("doc_ids must be a non-empty list of document IDs")
-            data = {"document_ids": doc_ids, "run": 2}
-            res = self.post(f'/doc/run', data)
-
-            if res.status_code != 200:
-                raise Exception(f"Failed to cancel async parsing for documents: {res.text}")
-
-            print(f"Async parsing canceled successfully for documents: {doc_ids}")
-
-        except Exception as e:
-            print(f"Error occurred during canceling parsing for documents: {str(e)}")
-            raise
-
-    def retrieval(self,
-                  question,
-                  datasets=None,
-                  documents=None,
-                  offset=0,
-                  limit=6,
-                  similarity_threshold=0.1,
-                  vector_similarity_weight=0.3,
-                  top_k=1024):
-        """
-        Perform document retrieval based on the given parameters.
-
-        :param question: The query question.
-        :param datasets: A list of datasets (optional, as documents may be provided directly).
-        :param documents: A list of documents (if specific documents are provided).
-        :param offset: Offset for the retrieval results.
-        :param limit: Maximum number of retrieval results.
-        :param similarity_threshold: Similarity threshold.
-        :param vector_similarity_weight: Weight of vector similarity.
-        :param top_k: Number of top most similar documents to consider (for pre-filtering or ranking).
-
-        Note: This is a hypothetical implementation and may need adjustments based on the actual backend service API.
-        """
-        try:
-            data = {
-                "question": question,
-                "datasets": datasets if datasets is not None else [],
-                "documents": [doc.id if hasattr(doc, 'id') else doc for doc in
-                              documents] if documents is not None else [],
+    def retrieve(self, question="",datasets=None,documents=None, offset=1, limit=30, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,):
+            data_params = {
                "offset": offset,
                "limit": limit,
                "similarity_threshold": similarity_threshold,
                "vector_similarity_weight": vector_similarity_weight,
                "top_k": top_k,
                "knowledgebase_id": datasets,
+                "rerank_id":rerank_id,
+                "keyword":keyword
+            }
+            data_json ={
+                "question": question,
+                "datasets": datasets,
+                "documents": documents
            }

            # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
-            res = self.post(f'/doc/retrieval_test', data)
-
-            # Check the response status code
-            if res.status_code == 200:
-                res_data = res.json()
-                if res_data.get("retmsg") == "success":
-                    chunks = []
-                    for chunk_data in res_data["data"].get("chunks", []):
-                        chunk = Chunk(self, chunk_data)
-                        chunks.append(chunk)
-                    return chunks
-                else:
-                    raise Exception(f"Error fetching chunks: {res_data.get('retmsg')}")
-            else:
-                raise Exception(f"API request failed with status code {res.status_code}")
-
-        except Exception as e:
-            print(f"An error occurred during retrieval: {e}")
-            raise
+            res = self.get(f'/retrieval', data_params,data_json)
+            res = res.json()
+            if res.get("code") ==0:
+                chunks=[]
+                for chunk_data in res["data"].get("chunks"):
+                    chunk=Chunk(self,chunk_data)
+                    chunks.append(chunk)
+                return chunks
+            raise Exception(res.get("message"))
--- a/sdk/python/test/t_document.py
+++ b/sdk/python/test/t_document.py
@ -63,17 +63,13 @@ class TestDocument(TestSdk):
        # Check if the retrieved document is of type Document
        if isinstance(doc, Document):
            # Download the document content and save it to a file
-            try:
-                with open("ragflow.txt", "wb+") as file:
-                    file.write(doc.download())
-                    # Print the document object for debugging
-                print(doc)
+            with open("./ragflow.txt", "wb+") as file:
+                file.write(doc.download())
+                # Print the document object for debugging
+            print(doc)

-                # Assert that the download was successful
-                assert True, "Document downloaded successfully."
-            except Exception as e:
-                # If an error occurs, raise an assertion error
-                assert False, f"Failed to download document, error: {str(e)}"
+            # Assert that the download was successful
+            assert True, f"Failed to download document, error: {doc}"
        else:
            # If the document retrieval fails, assert failure
            assert False, f"Failed to get document, error: {doc}"
@ -100,7 +96,7 @@ class TestDocument(TestSdk):
        blob2 = b"Sample document content for ingestion test222."
        list_1 = [{"name":name1,"blob":blob1},{"name":name2,"blob":blob2}]
        ds.upload_documents(list_1)
-        for d in ds.list_docs(keywords="test", offset=0, limit=12):
+        for d in ds.list_documents(keywords="test", offset=0, limit=12):
            assert isinstance(d, Document), "Failed to upload documents"

    def test_delete_documents_in_dataset_with_success(self):
@ -123,16 +119,11 @@ class TestDocument(TestSdk):
        blob1 = b"Sample document content for ingestion test333."
        name2 = "Test Document444.txt"
        blob2 = b"Sample document content for ingestion test444."
-        name3 = 'test.txt'
-        path = 'test_data/test.txt'
-        rag.create_document(ds, name=name3, blob=open(path, "rb").read())
-        rag.create_document(ds, name=name1, blob=blob1)
-        rag.create_document(ds, name=name2, blob=blob2)
-        for d in ds.list_docs(keywords="document", offset=0, limit=12):
+        ds.upload_documents([{"name":name1,"blob":blob1},{"name":name2,"blob":blob2}])
+        for d in ds.list_documents(keywords="document", offset=0, limit=12):
            assert isinstance(d, Document)
-            d.delete()
-            print(d)
-        remaining_docs = ds.list_docs(keywords="rag", offset=0, limit=12)
+            ds.delete_documents([d.id])
+        remaining_docs = ds.list_documents(keywords="rag", offset=0, limit=12)
        assert len(remaining_docs) == 0, "Documents were not properly deleted."

    def test_parse_and_cancel_document(self):
@ -144,16 +135,15 @@ class TestDocument(TestSdk):

        # Define the document name and path
        name3 = 'westworld.pdf'
-        path = 'test_data/westworld.pdf'
+        path = './test_data/westworld.pdf'

        # Create a document in the dataset using the file path
-        rag.create_document(ds, name=name3, blob=open(path, "rb").read())
+        ds.upload_documents({"name":name3, "blob":open(path, "rb").read()})

        # Retrieve the document by name
-        doc = rag.get_document(name="westworld.pdf")
-
-        # Initiate asynchronous parsing
-        doc.async_parse()
+        doc = rag.list_documents(name="westworld.pdf")
+        doc = doc[0]
+        ds.async_parse_documents(document_ids=[])

        # Print message to confirm asynchronous parsing has been initiated
        print("Async parsing initiated")