From 6874c6f3a77f65b03b4de2ec370c84da17789763 Mon Sep 17 00:00:00 2001 From: KevinHuSh Date: Mon, 29 Apr 2024 15:45:08 +0800 Subject: [PATCH] refine document upload (#602) ### What problem does this PR solve? #567 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/document_app.py | 100 ++++++++++++++++++++------------------- docker/entrypoint.sh | 3 +- 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/api/apps/document_app.py b/api/apps/document_app.py index d5551d747..08af7b1cf 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -51,55 +51,59 @@ def upload(): if 'file' not in request.files: return get_json_result( data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) - file = request.files['file'] - if file.filename == '': + + file_objs = request.files.getlist('file') + for file_obj in file_objs: + if file_obj.filename == '': + return get_json_result( + data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) + + err = [] + for file in file_objs: + try: + e, kb = KnowledgebaseService.get_by_id(kb_id) + if not e: + raise LookupError("Can't find this knowledgebase!") + MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) + if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: + raise RuntimeError("Exceed the maximum file number of a free user!") + + filename = duplicate_name( + DocumentService.query, + name=file.filename, + kb_id=kb.id) + filetype = filename_type(filename) + if filetype == FileType.OTHER.value: + raise RuntimeError("This type of file has not been supported yet!") + + location = filename + while MINIO.obj_exist(kb_id, location): + location += "_" + blob = file.read() + MINIO.put(kb_id, location, blob) + doc = { + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": kb.parser_id, + "parser_config": kb.parser_config, + "created_by": current_user.id, + "type": filetype, + "name": filename, + "location": location, + "size": len(blob), + "thumbnail": thumbnail(filename, blob) + } + if doc["type"] == FileType.VISUAL: + doc["parser_id"] = ParserType.PICTURE.value + if re.search(r"\.(ppt|pptx|pages)$", filename): + doc["parser_id"] = ParserType.PRESENTATION.value + DocumentService.insert(doc) + except Exception as e: + err.append(file.filename + ": " + str(e)) + if err: return get_json_result( - data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) - - try: - e, kb = KnowledgebaseService.get_by_id(kb_id) - if not e: - return get_data_error_result( - retmsg="Can't find this knowledgebase!") - MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) - if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: - return get_data_error_result( - retmsg="Exceed the maximum file number of a free user!") - - filename = duplicate_name( - DocumentService.query, - name=file.filename, - kb_id=kb.id) - filetype = filename_type(filename) - if filetype == FileType.OTHER.value: - return get_data_error_result( - retmsg="This type of file has not been supported yet!") - - location = filename - while MINIO.obj_exist(kb_id, location): - location += "_" - blob = request.files['file'].read() - MINIO.put(kb_id, location, blob) - doc = { - "id": get_uuid(), - "kb_id": kb.id, - "parser_id": kb.parser_id, - "parser_config": kb.parser_config, - "created_by": current_user.id, - "type": filetype, - "name": filename, - "location": location, - "size": len(blob), - "thumbnail": thumbnail(filename, blob) - } - if doc["type"] == FileType.VISUAL: - doc["parser_id"] = ParserType.PICTURE.value - if re.search(r"\.(ppt|pptx|pages)$", filename): - doc["parser_id"] = ParserType.PRESENTATION.value - doc = DocumentService.insert(doc) - return get_json_result(data=doc.to_json()) - except Exception as e: - return server_error_response(e) + data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR) + return get_json_result(data=True) @manager.route('/create', methods=['POST']) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 9854e848f..b957a51d3 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -34,6 +34,7 @@ do task_exe $i $WS & done +while [ 1 -eq 1 ];do $PY api/ragflow_server.py - +done wait; \ No newline at end of file