mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-20 12:39:07 +08:00
Updated document upload method (#777)
### What problem does this PR solve? api_app.py /document/upload add two non mandatory parameters parser_id: [naive,qaresume,manual,table,paper,book,laws,presentation,picture,one] run: 1 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
1e923f1c90
commit
626ace8639
@ -31,11 +31,11 @@ from api.settings import RetCode
|
|||||||
from api.utils import get_uuid, current_timestamp, datetime_format
|
from api.utils import get_uuid, current_timestamp, datetime_format
|
||||||
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request
|
||||||
from itsdangerous import URLSafeTimedSerializer
|
from itsdangerous import URLSafeTimedSerializer
|
||||||
|
from api.db.services.task_service import TaskService, queue_tasks
|
||||||
from api.utils.file_utils import filename_type, thumbnail
|
from api.utils.file_utils import filename_type, thumbnail
|
||||||
from rag.utils.minio_conn import MINIO
|
from rag.utils.minio_conn import MINIO
|
||||||
|
from api.db.db_models import Task
|
||||||
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
def generate_confirmation_token(tenent_id):
|
def generate_confirmation_token(tenent_id):
|
||||||
serializer = URLSafeTimedSerializer(tenent_id)
|
serializer = URLSafeTimedSerializer(tenent_id)
|
||||||
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
||||||
@ -229,6 +229,7 @@ def upload():
|
|||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
|
|
||||||
file = request.files['file']
|
file = request.files['file']
|
||||||
if file.filename == '':
|
if file.filename == '':
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
@ -252,6 +253,7 @@ def upload():
|
|||||||
location += "_"
|
location += "_"
|
||||||
blob = request.files['file'].read()
|
blob = request.files['file'].read()
|
||||||
MINIO.put(kb_id, location, blob)
|
MINIO.put(kb_id, location, blob)
|
||||||
|
|
||||||
doc = {
|
doc = {
|
||||||
"id": get_uuid(),
|
"id": get_uuid(),
|
||||||
"kb_id": kb.id,
|
"kb_id": kb.id,
|
||||||
@ -264,11 +266,42 @@ def upload():
|
|||||||
"size": len(blob),
|
"size": len(blob),
|
||||||
"thumbnail": thumbnail(filename, blob)
|
"thumbnail": thumbnail(filename, blob)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
form_data=request.form
|
||||||
|
if "parser_id" in form_data.keys():
|
||||||
|
if request.form.get("parser_id").strip() in list(vars(ParserType).values())[1:-3]:
|
||||||
|
doc["parser_id"] = request.form.get("parser_id").strip()
|
||||||
if doc["type"] == FileType.VISUAL:
|
if doc["type"] == FileType.VISUAL:
|
||||||
doc["parser_id"] = ParserType.PICTURE.value
|
doc["parser_id"] = ParserType.PICTURE.value
|
||||||
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||||
doc["parser_id"] = ParserType.PRESENTATION.value
|
doc["parser_id"] = ParserType.PRESENTATION.value
|
||||||
doc = DocumentService.insert(doc)
|
|
||||||
return get_json_result(data=doc.to_json())
|
doc_result = DocumentService.insert(doc)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
if "run" in form_data.keys():
|
||||||
|
if request.form.get("run").strip() == "1":
|
||||||
|
try:
|
||||||
|
info = {"run": 1, "progress": 0}
|
||||||
|
info["progress_msg"] = ""
|
||||||
|
info["chunk_num"] = 0
|
||||||
|
info["token_num"] = 0
|
||||||
|
DocumentService.update_by_id(doc["id"], info)
|
||||||
|
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
||||||
|
tenant_id = DocumentService.get_tenant_id(doc["id"])
|
||||||
|
if not tenant_id:
|
||||||
|
return get_data_error_result(retmsg="Tenant not found!")
|
||||||
|
|
||||||
|
#e, doc = DocumentService.get_by_id(doc["id"])
|
||||||
|
TaskService.filter_delete([Task.doc_id == doc["id"]])
|
||||||
|
e, doc = DocumentService.get_by_id(doc["id"])
|
||||||
|
doc = doc.to_dict()
|
||||||
|
doc["tenant_id"] = tenant_id
|
||||||
|
bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"])
|
||||||
|
queue_tasks(doc, bucket, name)
|
||||||
|
except Exception as e:
|
||||||
|
return server_error_response(e)
|
||||||
|
|
||||||
|
return get_json_result(data=doc_result.to_json())
|
Loading…
x
Reference in New Issue
Block a user