diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index c5a7c4991..551f9f847 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -35,7 +35,7 @@ from api.db.services.llm_service import LLMBundle @manager.route('/chats//sessions', methods=['POST']) @token_required -def create(tenant_id,chat_id): +def create(tenant_id, chat_id): req = request.json req["dialog_id"] = chat_id dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value) @@ -77,7 +77,7 @@ def create_agent_session(tenant_id, agent_id): conv = { "id": get_uuid(), "dialog_id": cvs.id, - "user_id": req.get("usr_id","") if isinstance(req, dict) else "", + "user_id": req.get("usr_id", "") if isinstance(req, dict) else "", "message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent" } @@ -88,11 +88,11 @@ def create_agent_session(tenant_id, agent_id): @manager.route('/chats//sessions/', methods=['PUT']) @token_required -def update(tenant_id,chat_id,session_id): +def update(tenant_id, chat_id, session_id): req = request.json req["dialog_id"] = chat_id conv_id = session_id - conv = ConversationService.query(id=conv_id,dialog_id=chat_id) + conv = ConversationService.query(id=conv_id, dialog_id=chat_id) if not conv: return get_error_data_result(message="Session does not exist") if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value): @@ -123,12 +123,12 @@ def completion(tenant_id, chat_id): return get_error_data_result(message="`name` can not be empty.") ConversationService.save(**conv) e, conv = ConversationService.get_by_id(conv["id"]) - session_id=conv.id + session_id = conv.id else: session_id = req.get("session_id") if not req.get("question"): return get_error_data_result(message="Please input your question.") - conv = ConversationService.query(id=session_id,dialog_id=chat_id) + conv = ConversationService.query(id=session_id, dialog_id=chat_id) if not conv: return get_error_data_result(message="Session does not exist") conv = conv[0] @@ -182,18 +182,18 @@ def completion(tenant_id, chat_id): chunk_list.append(new_chunk) reference["chunks"] = chunk_list ans["id"] = message_id - ans["session_id"]=session_id + ans["session_id"] = session_id def stream(): nonlocal dia, msg, req, conv try: for ans in chat(dia, msg, **req): fillin_conv(ans) - yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n" + yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n" ConversationService.update_by_id(conv.id, conv.to_dict()) except Exception as e: yield "data:" + json.dumps({"code": 500, "message": str(e), - "data": {"answer": "**ERROR**: " + str(e),"reference": []}}, + "data": {"answer": "**ERROR**: " + str(e), "reference": []}}, ensure_ascii=False) + "\n\n" yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n" @@ -235,7 +235,7 @@ def agent_completion(tenant_id, agent_id): conv = { "id": session_id, "dialog_id": cvs.id, - "user_id": req.get("user_id",""), + "user_id": req.get("user_id", ""), "message": [{"role": "assistant", "content": canvas.get_prologue()}], "source": "agent" } @@ -251,9 +251,9 @@ def agent_completion(tenant_id, agent_id): question = req.get("question") if not question: return get_error_data_result("`question` is required.") - question={ - "role":"user", - "content":question, + question = { + "role": "user", + "content": question, "id": str(uuid4()) } messages.append(question) @@ -375,7 +375,7 @@ def agent_completion(tenant_id, agent_id): @manager.route('/chats//sessions', methods=['GET']) @token_required -def list_session(chat_id,tenant_id): +def list_session(chat_id, tenant_id): if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value): return get_error_data_result(message=f"You don't own the assistant {chat_id}.") id = request.args.get("id") @@ -387,7 +387,7 @@ def list_session(chat_id,tenant_id): desc = False else: desc = True - convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name) + convs = ConversationService.get_list(chat_id, page_number, items_per_page, orderby, desc, id, name) if not convs: return get_result(data=[]) for conv in convs: @@ -429,7 +429,7 @@ def list_session(chat_id,tenant_id): @manager.route('/chats//sessions', methods=["DELETE"]) @token_required -def delete(tenant_id,chat_id): +def delete(tenant_id, chat_id): if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value): return get_error_data_result(message="You don't own the chat") req = request.json @@ -437,21 +437,22 @@ def delete(tenant_id,chat_id): if not req: ids = None else: - ids=req.get("ids") + ids = req.get("ids") if not ids: conv_list = [] for conv in convs: conv_list.append(conv.id) else: - conv_list=ids + conv_list = ids for id in conv_list: - conv = ConversationService.query(id=id,dialog_id=chat_id) + conv = ConversationService.query(id=id, dialog_id=chat_id) if not conv: return get_error_data_result(message="The chat doesn't own the session") ConversationService.delete_by_id(id) return get_result() + @manager.route('/sessions/ask', methods=['POST']) @token_required def ask_about(tenant_id): @@ -460,17 +461,18 @@ def ask_about(tenant_id): return get_error_data_result("`question` is required.") if not req.get("dataset_ids"): return get_error_data_result("`dataset_ids` is required.") - if not isinstance(req.get("dataset_ids"),list): + if not isinstance(req.get("dataset_ids"), list): return get_error_data_result("`dataset_ids` should be a list.") - req["kb_ids"]=req.pop("dataset_ids") + req["kb_ids"] = req.pop("dataset_ids") for kb_id in req["kb_ids"]: - if not KnowledgebaseService.accessible(kb_id,tenant_id): + if not KnowledgebaseService.accessible(kb_id, tenant_id): return get_error_data_result(f"You don't own the dataset {kb_id}.") kbs = KnowledgebaseService.query(id=kb_id) kb = kbs[0] if kb.chunk_num == 0: return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file") uid = tenant_id + def stream(): nonlocal req, uid try: diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 9c6139469..ae663ac3c 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -152,7 +152,7 @@ class RAGFlowPdfParser: max(len(up["text"]), len(down["text"])), len(tks_all) - len(tks_up) - len(tks_down), len(tks_down) - len(tks_up), - tks_down[-1] == tks_up[-1], + tks_down[-1] == tks_up[-1] if tks_down and tks_up else False, max(down["in_row"], up["in_row"]), abs(down["in_row"] - up["in_row"]), len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,