refine document upload (#602)

### What problem does this PR solve?

#567 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
KevinHuSh 2024-04-29 15:45:08 +08:00 committed by GitHub
parent 8acc01a227
commit 6874c6f3a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 54 additions and 49 deletions

View File

@ -51,20 +51,22 @@ def upload():
if 'file' not in request.files: if 'file' not in request.files:
return get_json_result( return get_json_result(
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
file = request.files['file']
if file.filename == '': file_objs = request.files.getlist('file')
for file_obj in file_objs:
if file_obj.filename == '':
return get_json_result( return get_json_result(
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
err = []
for file in file_objs:
try: try:
e, kb = KnowledgebaseService.get_by_id(kb_id) e, kb = KnowledgebaseService.get_by_id(kb_id)
if not e: if not e:
return get_data_error_result( raise LookupError("Can't find this knowledgebase!")
retmsg="Can't find this knowledgebase!")
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0)) MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER: if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
return get_data_error_result( raise RuntimeError("Exceed the maximum file number of a free user!")
retmsg="Exceed the maximum file number of a free user!")
filename = duplicate_name( filename = duplicate_name(
DocumentService.query, DocumentService.query,
@ -72,13 +74,12 @@ def upload():
kb_id=kb.id) kb_id=kb.id)
filetype = filename_type(filename) filetype = filename_type(filename)
if filetype == FileType.OTHER.value: if filetype == FileType.OTHER.value:
return get_data_error_result( raise RuntimeError("This type of file has not been supported yet!")
retmsg="This type of file has not been supported yet!")
location = filename location = filename
while MINIO.obj_exist(kb_id, location): while MINIO.obj_exist(kb_id, location):
location += "_" location += "_"
blob = request.files['file'].read() blob = file.read()
MINIO.put(kb_id, location, blob) MINIO.put(kb_id, location, blob)
doc = { doc = {
"id": get_uuid(), "id": get_uuid(),
@ -96,10 +97,13 @@ def upload():
doc["parser_id"] = ParserType.PICTURE.value doc["parser_id"] = ParserType.PICTURE.value
if re.search(r"\.(ppt|pptx|pages)$", filename): if re.search(r"\.(ppt|pptx|pages)$", filename):
doc["parser_id"] = ParserType.PRESENTATION.value doc["parser_id"] = ParserType.PRESENTATION.value
doc = DocumentService.insert(doc) DocumentService.insert(doc)
return get_json_result(data=doc.to_json())
except Exception as e: except Exception as e:
return server_error_response(e) err.append(file.filename + ": " + str(e))
if err:
return get_json_result(
data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
return get_json_result(data=True)
@manager.route('/create', methods=['POST']) @manager.route('/create', methods=['POST'])

View File

@ -34,6 +34,7 @@ do
task_exe $i $WS & task_exe $i $WS &
done done
while [ 1 -eq 1 ];do
$PY api/ragflow_server.py $PY api/ragflow_server.py
done
wait; wait;