rename some attributes in document sdk (#2481)

### What problem does this PR solve?

#1102

### Type of change

- [x] Performance Improvement

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
JobSmithManipulation 2024-09-18 18:46:37 +08:00 committed by GitHub
parent 01acc3fd5a
commit 2b0dc01a88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 66 additions and 62 deletions

View File

@ -99,6 +99,7 @@ def docinfos(tenant_id):
"chunk_num": "chunk_count", "chunk_num": "chunk_count",
"kb_id": "knowledgebase_id", "kb_id": "knowledgebase_id",
"token_num": "token_count", "token_num": "token_count",
"parser_id":"parser_method",
} }
renamed_doc = {} renamed_doc = {}
for key, value in doc.to_dict().items(): for key, value in doc.to_dict().items():
@ -125,10 +126,14 @@ def save_doc(tenant_id):
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
#other value can't be changed #other value can't be changed
if "chunk_num" in req: if "chunk_count" in req:
if req["chunk_num"] != doc.chunk_num: if req["chunk_count"] != doc.chunk_num:
return get_data_error_result( return get_data_error_result(
retmsg="Can't change chunk_count.") retmsg="Can't change chunk_count.")
if "token_count" in req:
if req["token_count"] != doc.token_num:
return get_data_error_result(
retmsg="Can't change token_count.")
if "progress" in req: if "progress" in req:
if req['progress'] != doc.progress: if req['progress'] != doc.progress:
return get_data_error_result( return get_data_error_result(
@ -158,9 +163,9 @@ def save_doc(tenant_id):
FileService.update_by_id(file.id, {"name": req["name"]}) FileService.update_by_id(file.id, {"name": req["name"]})
except Exception as e: except Exception as e:
return server_error_response(e) return server_error_response(e)
if "parser_id" in req: if "parser_method" in req:
try: try:
if doc.parser_id.lower() == req["parser_id"].lower(): if doc.parser_id.lower() == req["parser_method"].lower():
if "parser_config" in req: if "parser_config" in req:
if req["parser_config"] == doc.parser_config: if req["parser_config"] == doc.parser_config:
return get_json_result(data=True) return get_json_result(data=True)
@ -172,7 +177,7 @@ def save_doc(tenant_id):
return get_data_error_result(retmsg="Not supported yet!") return get_data_error_result(retmsg="Not supported yet!")
e = DocumentService.update_by_id(doc.id, e = DocumentService.update_by_id(doc.id,
{"parser_id": req["parser_id"], "progress": 0, "progress_msg": "", {"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
"run": TaskStatus.UNSTART.value}) "run": TaskStatus.UNSTART.value})
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
@ -183,7 +188,7 @@ def save_doc(tenant_id):
doc.process_duation * -1) doc.process_duation * -1)
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
tenant_id = DocumentService.get_tenant_id(req["doc_id"]) tenant_id = DocumentService.get_tenant_id(req["id"])
if not tenant_id: if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!") return get_data_error_result(retmsg="Tenant not found!")
ELASTICSEARCH.deleteByQuery( ELASTICSEARCH.deleteByQuery(
@ -272,7 +277,7 @@ def rename():
@manager.route("/<document_id>", methods=["GET"]) @manager.route("/<document_id>", methods=["GET"])
@token_required @token_required
def download_document(dataset_id, document_id,tenant_id): def download_document(document_id,tenant_id):
try: try:
# Check whether there is this document # Check whether there is this document
exist, document = DocumentService.get_by_id(document_id) exist, document = DocumentService.get_by_id(document_id)
@ -304,7 +309,7 @@ def download_document(dataset_id, document_id,tenant_id):
@manager.route('/dataset/<dataset_id>/documents', methods=['GET']) @manager.route('/dataset/<dataset_id>/documents', methods=['GET'])
@token_required @token_required
def list_docs(dataset_id, tenant_id): def list_docs(dataset_id, tenant_id):
kb_id = request.args.get("kb_id") kb_id = request.args.get("knowledgebase_id")
if not kb_id: if not kb_id:
return get_json_result( return get_json_result(
data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR) data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
@ -334,6 +339,7 @@ def list_docs(dataset_id, tenant_id):
"chunk_num": "chunk_count", "chunk_num": "chunk_count",
"kb_id": "knowledgebase_id", "kb_id": "knowledgebase_id",
"token_num": "token_count", "token_num": "token_count",
"parser_id":"parser_method"
} }
renamed_doc = {} renamed_doc = {}
for key, value in doc.items(): for key, value in doc.items():
@ -349,10 +355,10 @@ def list_docs(dataset_id, tenant_id):
@token_required @token_required
def rm(tenant_id): def rm(tenant_id):
req = request.args req = request.args
if "doc_id" not in req: if "document_id" not in req:
return get_data_error_result( return get_data_error_result(
retmsg="doc_id is required") retmsg="doc_id is required")
doc_ids = req["doc_id"] doc_ids = req["document_id"]
if isinstance(doc_ids, str): doc_ids = [doc_ids] if isinstance(doc_ids, str): doc_ids = [doc_ids]
root_folder = FileService.get_root_folder(tenant_id) root_folder = FileService.get_root_folder(tenant_id)
pf_id = root_folder["id"] pf_id = root_folder["id"]
@ -413,7 +419,7 @@ def show_parsing_status(tenant_id, document_id):
def run(tenant_id): def run(tenant_id):
req = request.json req = request.json
try: try:
for id in req["doc_ids"]: for id in req["document_ids"]:
info = {"run": str(req["run"]), "progress": 0} info = {"run": str(req["run"]), "progress": 0}
if str(req["run"]) == TaskStatus.RUNNING.value: if str(req["run"]) == TaskStatus.RUNNING.value:
info["progress_msg"] = "" info["progress_msg"] = ""
@ -442,15 +448,15 @@ def run(tenant_id):
@manager.route('/chunk/list', methods=['POST']) @manager.route('/chunk/list', methods=['POST'])
@token_required @token_required
@validate_request("doc_id") @validate_request("document_id")
def list_chunk(tenant_id): def list_chunk(tenant_id):
req = request.json req = request.json
doc_id = req["doc_id"] doc_id = req["document_id"]
page = int(req.get("page", 1)) page = int(req.get("page", 1))
size = int(req.get("size", 30)) size = int(req.get("size", 30))
question = req.get("keywords", "") question = req.get("keywords", "")
try: try:
tenant_id = DocumentService.get_tenant_id(req["doc_id"]) tenant_id = DocumentService.get_tenant_id(req["document_id"])
if not tenant_id: if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!") return get_data_error_result(retmsg="Tenant not found!")
e, doc = DocumentService.get_by_id(doc_id) e, doc = DocumentService.get_by_id(doc_id)
@ -509,15 +515,15 @@ def list_chunk(tenant_id):
@manager.route('/chunk/create', methods=['POST']) @manager.route('/chunk/create', methods=['POST'])
@token_required @token_required
@validate_request("doc_id", "content_with_weight") @validate_request("document_id", "content")
def create(tenant_id): def create(tenant_id):
req = request.json req = request.json
md5 = hashlib.md5() md5 = hashlib.md5()
md5.update((req["content_with_weight"] + req["doc_id"]).encode("utf-8")) md5.update((req["content"] + req["document_id"]).encode("utf-8"))
chunk_id = md5.hexdigest() chunk_id = md5.hexdigest()
d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content_with_weight"]), d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
"content_with_weight": req["content_with_weight"]} "content_with_weight": req["content"]}
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
d["important_kwd"] = req.get("important_kwd", []) d["important_kwd"] = req.get("important_kwd", [])
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", []))) d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
@ -525,22 +531,22 @@ def create(tenant_id):
d["create_timestamp_flt"] = datetime.datetime.now().timestamp() d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
try: try:
e, doc = DocumentService.get_by_id(req["doc_id"]) e, doc = DocumentService.get_by_id(req["document_id"])
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
d["kb_id"] = [doc.kb_id] d["kb_id"] = [doc.kb_id]
d["docnm_kwd"] = doc.name d["docnm_kwd"] = doc.name
d["doc_id"] = doc.id d["doc_id"] = doc.id
tenant_id = DocumentService.get_tenant_id(req["doc_id"]) tenant_id = DocumentService.get_tenant_id(req["document_id"])
if not tenant_id: if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!") return get_data_error_result(retmsg="Tenant not found!")
embd_id = DocumentService.get_embd_id(req["doc_id"]) embd_id = DocumentService.get_embd_id(req["document_id"])
embd_mdl = TenantLLMService.model_instance( embd_mdl = TenantLLMService.model_instance(
tenant_id, LLMType.EMBEDDING.value, embd_id) tenant_id, LLMType.EMBEDDING.value, embd_id)
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) v, c = embd_mdl.encode([doc.name, req["content"]])
v = 0.1 * v[0] + 0.9 * v[1] v = 0.1 * v[0] + 0.9 * v[1]
d["q_%d_vec" % len(v)] = v.tolist() d["q_%d_vec" % len(v)] = v.tolist()
ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
@ -568,14 +574,14 @@ def create(tenant_id):
@manager.route('/chunk/rm', methods=['POST']) @manager.route('/chunk/rm', methods=['POST'])
@token_required @token_required
@validate_request("chunk_ids", "doc_id") @validate_request("chunk_ids", "document_id")
def rm_chunk(tenant_id): def rm_chunk(tenant_id):
req = request.json req = request.json
try: try:
if not ELASTICSEARCH.deleteByQuery( if not ELASTICSEARCH.deleteByQuery(
Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)): Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
return get_data_error_result(retmsg="Index updating failure") return get_data_error_result(retmsg="Index updating failure")
e, doc = DocumentService.get_by_id(req["doc_id"]) e, doc = DocumentService.get_by_id(req["document_id"])
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
deleted_chunk_ids = req["chunk_ids"] deleted_chunk_ids = req["chunk_ids"]
@ -587,30 +593,30 @@ def rm_chunk(tenant_id):
@manager.route('/chunk/set', methods=['POST']) @manager.route('/chunk/set', methods=['POST'])
@token_required @token_required
@validate_request("doc_id", "chunk_id", "content_with_weight", @validate_request("document_id", "chunk_id", "content",
"important_kwd") "important_keywords")
def set(tenant_id): def set(tenant_id):
req = request.json req = request.json
d = { d = {
"id": req["chunk_id"], "id": req["chunk_id"],
"content_with_weight": req["content_with_weight"]} "content_with_weight": req["content"]}
d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"]) d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"]) d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
d["important_kwd"] = req["important_kwd"] d["important_kwd"] = req["important_keywords"]
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"])) d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
if "available_int" in req: if "available_int" in req:
d["available_int"] = req["available_int"] d["available_int"] = req["available_int"]
try: try:
tenant_id = DocumentService.get_tenant_id(req["doc_id"]) tenant_id = DocumentService.get_tenant_id(req["document_id"])
if not tenant_id: if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!") return get_data_error_result(retmsg="Tenant not found!")
embd_id = DocumentService.get_embd_id(req["doc_id"]) embd_id = DocumentService.get_embd_id(req["document_id"])
embd_mdl = TenantLLMService.model_instance( embd_mdl = TenantLLMService.model_instance(
tenant_id, LLMType.EMBEDDING.value, embd_id) tenant_id, LLMType.EMBEDDING.value, embd_id)
e, doc = DocumentService.get_by_id(req["doc_id"]) e, doc = DocumentService.get_by_id(req["document_id"])
if not e: if not e:
return get_data_error_result(retmsg="Document not found!") return get_data_error_result(retmsg="Document not found!")
@ -618,7 +624,7 @@ def set(tenant_id):
arr = [ arr = [
t for t in re.split( t for t in re.split(
r"[\n\t]", r"[\n\t]",
req["content_with_weight"]) if len(t) > 1] req["content"]) if len(t) > 1]
if len(arr) != 2: if len(arr) != 2:
return get_data_error_result( return get_data_error_result(
retmsg="Q&A must be separated by TAB/ENTER key.") retmsg="Q&A must be separated by TAB/ENTER key.")
@ -626,7 +632,7 @@ def set(tenant_id):
d = beAdoc(d, arr[0], arr[1], not any( d = beAdoc(d, arr[0], arr[1], not any(
[rag_tokenizer.is_chinese(t) for t in q + a])) [rag_tokenizer.is_chinese(t) for t in q + a]))
v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) v, c = embd_mdl.encode([doc.name, req["content"]])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist() d["q_%d_vec" % len(v)] = v.tolist()
ELASTICSEARCH.upsert([d], search.index_name(tenant_id)) ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
@ -636,13 +642,13 @@ def set(tenant_id):
@manager.route('/retrieval_test', methods=['POST']) @manager.route('/retrieval_test', methods=['POST'])
@token_required @token_required
@validate_request("kb_id", "question") @validate_request("knowledgebase_id", "question")
def retrieval_test(tenant_id): def retrieval_test(tenant_id):
req = request.json req = request.json
page = int(req.get("page", 1)) page = int(req.get("page", 1))
size = int(req.get("size", 30)) size = int(req.get("size", 30))
question = req["question"] question = req["question"]
kb_id = req["kb_id"] kb_id = req["knowledgebase_id"]
if isinstance(kb_id, str): kb_id = [kb_id] if isinstance(kb_id, str): kb_id = [kb_id]
doc_ids = req.get("doc_ids", []) doc_ids = req.get("doc_ids", [])
similarity_threshold = float(req.get("similarity_threshold", 0.2)) similarity_threshold = float(req.get("similarity_threshold", 0.2))
@ -693,6 +699,7 @@ def retrieval_test(tenant_id):
"content_with_weight": "content", "content_with_weight": "content",
"doc_id": "document_id", "doc_id": "document_id",
"important_kwd": "important_keywords", "important_kwd": "important_keywords",
"docnm_kwd":"document_keyword"
} }
rename_chunk={} rename_chunk={}
for key, value in chunk.items(): for key, value in chunk.items():

View File

@ -22,7 +22,7 @@ class Chunk(Base):
Delete the chunk in the document. Delete the chunk in the document.
""" """
res = self.post('/doc/chunk/rm', res = self.post('/doc/chunk/rm',
{"doc_id": self.document_id, 'chunk_ids': [self.id]}) {"document_id": self.document_id, 'chunk_ids': [self.id]})
res = res.json() res = res.json()
if res.get("retmsg") == "success": if res.get("retmsg") == "success":
return True return True
@ -34,13 +34,13 @@ class Chunk(Base):
""" """
res = self.post('/doc/chunk/set', res = self.post('/doc/chunk/set',
{"chunk_id": self.id, {"chunk_id": self.id,
"kb_id": self.knowledgebase_id, "knowledgebase_id": self.knowledgebase_id,
"name": self.document_name, "name": self.document_name,
"content_with_weight": self.content, "content": self.content,
"important_kwd": self.important_keywords, "important_keywords": self.important_keywords,
"create_time": self.create_time, "create_time": self.create_time,
"create_timestamp_flt": self.create_timestamp_float, "create_timestamp_flt": self.create_timestamp_float,
"doc_id": self.document_id, "document_id": self.document_id,
"status": self.status, "status": self.status,
}) })
res = res.json() res = res.json()

View File

@ -65,7 +65,7 @@ class DataSet(Base):
""" """
# Construct the request payload for listing documents # Construct the request payload for listing documents
payload = { payload = {
"kb_id": self.id, "knowledgebase_id": self.id,
"keywords": keywords, "keywords": keywords,
"offset": offset, "offset": offset,
"limit": limit "limit": limit

View File

@ -34,10 +34,10 @@ class Document(Base):
Save the document details to the server. Save the document details to the server.
""" """
res = self.post('/doc/save', res = self.post('/doc/save',
{"id": self.id, "name": self.name, "thumbnail": self.thumbnail, "kb_id": self.knowledgebase_id, {"id": self.id, "name": self.name, "thumbnail": self.thumbnail, "knowledgebase_id": self.knowledgebase_id,
"parser_id": self.parser_method, "parser_config": self.parser_config.to_json(), "parser_method": self.parser_method, "parser_config": self.parser_config.to_json(),
"source_type": self.source_type, "type": self.type, "created_by": self.created_by, "source_type": self.source_type, "type": self.type, "created_by": self.created_by,
"size": self.size, "token_num": self.token_count, "chunk_num": self.chunk_count, "size": self.size, "token_count": self.token_count, "chunk_count": self.chunk_count,
"progress": self.progress, "progress_msg": self.progress_msg, "progress": self.progress, "progress_msg": self.progress_msg,
"process_begin_at": self.process_begin_at, "process_duation": self.process_duration "process_begin_at": self.process_begin_at, "process_duation": self.process_duration
}) })
@ -51,7 +51,7 @@ class Document(Base):
Delete the document from the server. Delete the document from the server.
""" """
res = self.rm('/doc/delete', res = self.rm('/doc/delete',
{"doc_id": self.id}) {"document_id": self.id})
res = res.json() res = res.json()
if res.get("retmsg") == "success": if res.get("retmsg") == "success":
return True return True
@ -83,7 +83,7 @@ class Document(Base):
""" """
try: try:
# Construct request data including document ID and run status (assuming 1 means to run) # Construct request data including document ID and run status (assuming 1 means to run)
data = {"doc_ids": [self.id], "run": 1} data = {"document_ids": [self.id], "run": 1}
# Send a POST request to the specified parsing status endpoint to start parsing # Send a POST request to the specified parsing status endpoint to start parsing
res = self.post(f'/doc/run', data) res = self.post(f'/doc/run', data)
@ -112,7 +112,7 @@ class Document(Base):
start_time = time.time() start_time = time.time()
while time.time() - start_time < timeout: while time.time() - start_time < timeout:
# Check the parsing status # Check the parsing status
res = self.get(f'/doc/{self.id}/status', {"doc_ids": [self.id]}) res = self.get(f'/doc/{self.id}/status', {"document_ids": [self.id]})
res_data = res.json() res_data = res.json()
data = res_data.get("data", []) data = res_data.get("data", [])
@ -133,7 +133,7 @@ class Document(Base):
""" """
try: try:
# Construct request data, including document ID and action to cancel (assuming 2 means cancel) # Construct request data, including document ID and action to cancel (assuming 2 means cancel)
data = {"doc_ids": [self.id], "run": 2} data = {"document_ids": [self.id], "run": 2}
# Send a POST request to the specified parsing status endpoint to cancel parsing # Send a POST request to the specified parsing status endpoint to cancel parsing
res = self.post(f'/doc/run', data) res = self.post(f'/doc/run', data)
@ -162,7 +162,7 @@ class Document(Base):
list: A list of chunks returned from the API. list: A list of chunks returned from the API.
""" """
data = { data = {
"doc_id": self.id, "document_id": self.id,
"page": page, "page": page,
"size": size, "size": size,
"keywords": keywords, "keywords": keywords,
@ -188,7 +188,7 @@ class Document(Base):
raise Exception(f"API request failed with status code {res.status_code}") raise Exception(f"API request failed with status code {res.status_code}")
def add_chunk(self, content: str): def add_chunk(self, content: str):
res = self.post('/doc/chunk/create', {"doc_id": self.id, "content_with_weight":content}) res = self.post('/doc/chunk/create', {"document_id": self.id, "content":content})
if res.status_code == 200: if res.status_code == 200:
res_data = res.json().get("data") res_data = res.json().get("data")
chunk_data = res_data.get("chunk") chunk_data = res_data.get("chunk")

View File

@ -150,14 +150,11 @@ class RAGFlow:
files = { files = {
'file': (name, blob) 'file': (name, blob)
} }
data = {
'kb_id': ds.id
}
headers = { headers = {
'Authorization': f"Bearer {ds.rag.user_key}" 'Authorization': f"Bearer {ds.rag.user_key}"
} }
response = requests.post(self.api_url + url, data=data, files=files, response = requests.post(self.api_url + url, files=files,
headers=headers) headers=headers)
if response.status_code == 200 and response.json().get('retmsg') == 'success': if response.status_code == 200 and response.json().get('retmsg') == 'success':
@ -184,7 +181,7 @@ class RAGFlow:
if not doc_ids or not isinstance(doc_ids, list): if not doc_ids or not isinstance(doc_ids, list):
raise ValueError("doc_ids must be a non-empty list of document IDs") raise ValueError("doc_ids must be a non-empty list of document IDs")
data = {"doc_ids": doc_ids, "run": 1} data = {"document_ids": doc_ids, "run": 1}
res = self.post(f'/doc/run', data) res = self.post(f'/doc/run', data)
@ -206,7 +203,7 @@ class RAGFlow:
try: try:
if not doc_ids or not isinstance(doc_ids, list): if not doc_ids or not isinstance(doc_ids, list):
raise ValueError("doc_ids must be a non-empty list of document IDs") raise ValueError("doc_ids must be a non-empty list of document IDs")
data = {"doc_ids": doc_ids, "run": 2} data = {"document_ids": doc_ids, "run": 2}
res = self.post(f'/doc/run', data) res = self.post(f'/doc/run', data)
if res.status_code != 200: if res.status_code != 200:
@ -252,7 +249,7 @@ class RAGFlow:
"similarity_threshold": similarity_threshold, "similarity_threshold": similarity_threshold,
"vector_similarity_weight": vector_similarity_weight, "vector_similarity_weight": vector_similarity_weight,
"top_k": top_k, "top_k": top_k,
"kb_id": datasets, "knowledgebase_id": datasets,
} }
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary) # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)

View File

@ -255,14 +255,14 @@ class TestDocument(TestSdk):
def test_add_chunk_to_chunk_list(self): def test_add_chunk_to_chunk_list(self):
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name='story.txt') doc = rag.get_document(name='story.txt')
chunk = doc.add_chunk(content="assss") chunk = doc.add_chunk(content="assssdd")
assert chunk is not None, "Chunk is None" assert chunk is not None, "Chunk is None"
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
def test_delete_chunk_of_chunk_list(self): def test_delete_chunk_of_chunk_list(self):
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name='story.txt') doc = rag.get_document(name='story.txt')
chunk = doc.add_chunk(content="assss") chunk = doc.add_chunk(content="assssdd")
assert chunk is not None, "Chunk is None" assert chunk is not None, "Chunk is None"
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
doc = rag.get_document(name='story.txt') doc = rag.get_document(name='story.txt')
@ -274,7 +274,7 @@ class TestDocument(TestSdk):
def test_update_chunk_content(self): def test_update_chunk_content(self):
rag = RAGFlow(API_KEY, HOST_ADDRESS) rag = RAGFlow(API_KEY, HOST_ADDRESS)
doc = rag.get_document(name='story.txt') doc = rag.get_document(name='story.txt')
chunk = doc.add_chunk(content="assssd") chunk = doc.add_chunk(content="assssddd")
assert chunk is not None, "Chunk is None" assert chunk is not None, "Chunk is None"
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list" assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
chunk.content = "ragflow123" chunk.content = "ragflow123"