diff --git a/api/apps/api_app.py b/api/apps/api_app.py index d5b2e18cb..05ff44bb4 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -13,18 +13,28 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os +import re from datetime import datetime, timedelta from flask import request from flask_login import login_required, current_user + +from api.db import FileType, ParserType from api.db.db_models import APIToken, API4Conversation +from api.db.services import duplicate_name from api.db.services.api_service import APITokenService, API4ConversationService from api.db.services.dialog_service import DialogService, chat +from api.db.services.document_service import DocumentService +from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.user_service import UserTenantService from api.settings import RetCode from api.utils import get_uuid, current_timestamp, datetime_format from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request from itsdangerous import URLSafeTimedSerializer +from api.utils.file_utils import filename_type, thumbnail +from rag.utils import MINIO + def generate_confirmation_token(tenent_id): serializer = URLSafeTimedSerializer(tenent_id) @@ -191,4 +201,74 @@ def get(conversation_id): return get_json_result(data=conv.to_dict()) except Exception as e: - return server_error_response(e) \ No newline at end of file + return server_error_response(e) + + +@manager.route('/document/upload', methods=['POST']) +@validate_request("kb_name") +def upload(): + token = request.headers.get('Authorization').split()[1] + objs = APIToken.query(token=token) + if not objs: + return get_json_result( + data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR) + + kb_name = request.form.get("kb_name").strip() + tenant_id = objs[0].tenant_id + + try: + e, kb = KnowledgebaseService.get_by_name(kb_name, tenant_id) + if not e: + return get_data_error_result( + retmsg="Can't find this knowledgebase!") + kb_id = kb.id + except Exception as e: + return server_error_response(e) + + if 'file' not in request.files: + return get_json_result( + data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR) + + file = request.files['file'] + if file.filename == '': + return get_json_result( + data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR) + try: + if DocumentService.get_doc_count(kb.tenant_id) >= int(os.environ.get('MAX_FILE_NUM_PER_USER', 8192)): + return get_data_error_result( + retmsg="Exceed the maximum file number of a free user!") + + filename = duplicate_name( + DocumentService.query, + name=file.filename, + kb_id=kb_id) + filetype = filename_type(filename) + if not filetype: + return get_data_error_result( + retmsg="This type of file has not been supported yet!") + + location = filename + while MINIO.obj_exist(kb_id, location): + location += "_" + blob = request.files['file'].read() + MINIO.put(kb_id, location, blob) + doc = { + "id": get_uuid(), + "kb_id": kb.id, + "parser_id": kb.parser_id, + "parser_config": kb.parser_config, + "created_by": kb.tenant_id, + "type": filetype, + "name": filename, + "location": location, + "size": len(blob), + "thumbnail": thumbnail(filename, blob) + } + if doc["type"] == FileType.VISUAL: + doc["parser_id"] = ParserType.PICTURE.value + if re.search(r"\.(ppt|pptx|pages)$", filename): + doc["parser_id"] = ParserType.PRESENTATION.value + doc = DocumentService.insert(doc) + return get_json_result(data=doc.to_json()) + except Exception as e: + return server_error_response(e) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 8ea60cd28..50633b85b 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -27,7 +27,8 @@ class KnowledgebaseService(CommonService): page_number, items_per_page, orderby, desc): kbs = cls.model.select().where( ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | (cls.model.tenant_id == user_id)) + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) & (cls.model.status == StatusEnum.VALID.value) ) if desc: @@ -56,7 +57,8 @@ class KnowledgebaseService(CommonService): cls.model.chunk_num, cls.model.parser_id, cls.model.parser_config] - kbs = cls.model.select(*fields).join(Tenant, on=((Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( + kbs = cls.model.select(*fields).join(Tenant, on=( + (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( (cls.model.id == kb_id), (cls.model.status == StatusEnum.VALID.value) ) @@ -86,6 +88,7 @@ class KnowledgebaseService(CommonService): old[k] = list(set(old[k] + v)) else: old[k] = v + dfs_update(m.parser_config, config) cls.update_by_id(id, {"parser_config": m.parser_config}) @@ -97,3 +100,15 @@ class KnowledgebaseService(CommonService): if k.parser_config and "field_map" in k.parser_config: conf.update(k.parser_config["field_map"]) return conf + + @classmethod + @DB.connection_context() + def get_by_name(cls, kb_name, tenant_id): + kb = cls.model.select().where( + (cls.model.name == kb_name) + & (cls.model.tenant_id == tenant_id) + & (cls.model.status == StatusEnum.VALID.value) + ) + if kb: + return True, kb[0] + return False, None diff --git a/docs/conversation_api.md b/docs/conversation_api.md index f2289fbc0..81d7ef34b 100644 --- a/docs/conversation_api.md +++ b/docs/conversation_api.md @@ -303,5 +303,61 @@ This will be called to get the answer to users' questions. ## Get document content or image This is usually used when display content of citation. -### Path: /document/get/\ +### Path: /api/document/get/\ ### Method: GET + +## Upload file + +This is usually used when upload a file to. +### Path: /api/document/upload/ +### Method: POST + +### Parameter: + +| name | type | optional | description | +|---------|--------|----------|----------------------------------------| +| file | file | No | Upload file. | +| kb_name | string | No | Choose the upload knowledge base name. | + +### Response +```json +{ + "data": { + "chunk_num": 0, + "create_date": "Thu, 25 Apr 2024 14:30:06 GMT", + "create_time": 1714026606921, + "created_by": "553ec818fd5711ee8ea63043d7ed348e", + "id": "41e9324602cd11ef9f5f3043d7ed348e", + "kb_id": "06802686c0a311ee85d6246e9694c130", + "location": "readme.txt", + "name": "readme.txt", + "parser_config": { + "field_map": { + }, + "pages": [ + [ + 0, + 1000000 + ] + ] + }, + "parser_id": "general", + "process_begin_at": null, + "process_duation": 0.0, + "progress": 0.0, + "progress_msg": "", + "run": "0", + "size": 929, + "source_type": "local", + "status": "1", + "thumbnail": null, + "token_num": 0, + "type": "doc", + "update_date": "Thu, 25 Apr 2024 14:30:06 GMT", + "update_time": 1714026606921 + }, + "retcode": 0, + "retmsg": "success" +} + +``` \ No newline at end of file