[BREAKING CHANGE] GET to POST: enhance document list capability (#7349)

### What problem does this PR solve?

Enhance capability of `list_docs`.

Breaking change: change method from `GET` to `POST`.

### Type of change

- [x] Refactoring
- [x] Enhancement with breaking change
This commit is contained in:
Yongteng Lei 2025-04-27 16:48:27 +08:00 committed by GitHub
parent 5043143bc5
commit a4be6c50cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 62 additions and 14 deletions

View File

@ -21,7 +21,7 @@ from flask import request, Response
from api.db.services.llm_service import TenantLLMService from api.db.services.llm_service import TenantLLMService
from flask_login import login_required, current_user from flask_login import login_required, current_user
from api.db import FileType, LLMType, ParserType, FileSource from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, LLMType, ParserType, FileSource
from api.db.db_models import APIToken, Task, File from api.db.db_models import APIToken, Task, File
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.db.services.api_service import APITokenService, API4ConversationService from api.db.services.api_service import APITokenService, API4ConversationService
@ -577,10 +577,23 @@ def list_kb_docs():
orderby = req.get("orderby", "create_time") orderby = req.get("orderby", "create_time")
desc = req.get("desc", True) desc = req.get("desc", True)
keywords = req.get("keywords", "") keywords = req.get("keywords", "")
status = req.get("status", [])
if status:
invalid_status = {s for s in status if s not in VALID_TASK_STATUS}
if invalid_status:
return get_data_error_result(
message=f"Invalid filter status conditions: {', '.join(invalid_status)}"
)
types = req.get("types", [])
if types:
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
if invalid_types:
return get_data_error_result(
message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}"
)
try: try:
docs, tol = DocumentService.get_by_kb_id( docs, tol = DocumentService.get_by_kb_id(
kb_id, page_number, items_per_page, orderby, desc, keywords) kb_id, page_number, items_per_page, orderby, desc, keywords, status, types)
docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs] docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs]
return get_json_result(data={"total": tol, "docs": docs}) return get_json_result(data={"total": tol, "docs": docs})

View File

@ -25,7 +25,7 @@ from flask_login import login_required, current_user
from deepdoc.parser.html_parser import RAGFlowHtmlParser from deepdoc.parser.html_parser import RAGFlowHtmlParser
from rag.nlp import search from rag.nlp import search
from api.db import FileType, TaskStatus, ParserType, FileSource from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, TaskStatus, ParserType, FileSource
from api.db.db_models import File, Task from api.db.db_models import File, Task
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
@ -183,7 +183,7 @@ def create():
return server_error_response(e) return server_error_response(e)
@manager.route('/list', methods=['GET']) # noqa: F821 @manager.route('/list', methods=['POST']) # noqa: F821
@login_required @login_required
def list_docs(): def list_docs():
kb_id = request.args.get("kb_id") kb_id = request.args.get("kb_id")
@ -201,13 +201,32 @@ def list_docs():
code=settings.RetCode.OPERATING_ERROR) code=settings.RetCode.OPERATING_ERROR)
keywords = request.args.get("keywords", "") keywords = request.args.get("keywords", "")
page_number = int(request.args.get("page", 1)) page_number = int(request.args.get("page", 0))
items_per_page = int(request.args.get("page_size", 15)) items_per_page = int(request.args.get("page_size", 0))
orderby = request.args.get("orderby", "create_time") orderby = request.args.get("orderby", "create_time")
desc = request.args.get("desc", True) desc = request.args.get("desc", True)
req = request.get_json()
run_status = req.get("run_status", [])
if run_status:
invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS}
if invalid_status:
return get_data_error_result(
message=f"Invalid filter run status conditions: {', '.join(invalid_status)}"
)
types = req.get("types", [])
if types:
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
if invalid_types:
return get_data_error_result(
message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}"
)
try: try:
docs, tol = DocumentService.get_by_kb_id( docs, tol = DocumentService.get_by_kb_id(
kb_id, page_number, items_per_page, orderby, desc, keywords) kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types)
for doc_item in docs: for doc_item in docs:
if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX): if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX):
@ -331,7 +350,9 @@ def rm():
message="Database error (Document removal)!") message="Database error (Document removal)!")
f2d = File2DocumentService.get_by_document_id(doc_id) f2d = File2DocumentService.get_by_document_id(doc_id)
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id]) deleted_file_count = 0
if f2d:
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc_id) File2DocumentService.delete_by_document_id(doc_id)
if deleted_file_count > 0: if deleted_file_count > 0:
STORAGE_IMPL.rm(b, n) STORAGE_IMPL.rm(b, n)

View File

@ -49,6 +49,7 @@ class FileType(StrEnum):
FOLDER = 'folder' FOLDER = 'folder'
OTHER = "other" OTHER = "other"
VALID_FILE_TYPES = {FileType.PDF, FileType.DOC, FileType.VISUAL, FileType.AURAL, FileType.VIRTUAL, FileType.FOLDER, FileType.OTHER}
class LLMType(StrEnum): class LLMType(StrEnum):
CHAT = 'chat' CHAT = 'chat'
@ -73,6 +74,7 @@ class TaskStatus(StrEnum):
DONE = "3" DONE = "3"
FAIL = "4" FAIL = "4"
VALID_TASK_STATUS = {TaskStatus.UNSTART, TaskStatus.RUNNING, TaskStatus.CANCEL, TaskStatus.DONE, TaskStatus.FAIL}
class ParserType(StrEnum): class ParserType(StrEnum):
PRESENTATION = "presentation" PRESENTATION = "presentation"

View File

@ -70,7 +70,7 @@ class DocumentService(CommonService):
@classmethod @classmethod
@DB.connection_context() @DB.connection_context()
def get_by_kb_id(cls, kb_id, page_number, items_per_page, def get_by_kb_id(cls, kb_id, page_number, items_per_page,
orderby, desc, keywords): orderby, desc, keywords, run_status, types):
if keywords: if keywords:
docs = cls.model.select().where( docs = cls.model.select().where(
(cls.model.kb_id == kb_id), (cls.model.kb_id == kb_id),
@ -78,13 +78,21 @@ class DocumentService(CommonService):
) )
else: else:
docs = cls.model.select().where(cls.model.kb_id == kb_id) docs = cls.model.select().where(cls.model.kb_id == kb_id)
if run_status:
docs = docs.where(cls.model.run.in_(run_status))
if types:
docs = docs.where(cls.model.type.in_(types))
count = docs.count() count = docs.count()
if desc: if desc:
docs = docs.order_by(cls.model.getter_by(orderby).desc()) docs = docs.order_by(cls.model.getter_by(orderby).desc())
else: else:
docs = docs.order_by(cls.model.getter_by(orderby).asc()) docs = docs.order_by(cls.model.getter_by(orderby).asc())
docs = docs.paginate(page_number, items_per_page)
if page_number and items_per_page:
docs = docs.paginate(page_number, items_per_page)
return list(docs.dicts()), count return list(docs.dicts()), count

View File

@ -97,7 +97,7 @@ class KnowledgebaseService(CommonService):
kb = kbs[0] kb = kbs[0]
# Get all documents in the knowledge base # Get all documents in the knowledge base
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "") docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], [])
# Check parsing status of each document # Check parsing status of each document
for doc in docs: for doc in docs:
@ -226,7 +226,10 @@ class KnowledgebaseService(CommonService):
cls.model.chunk_num, cls.model.chunk_num,
cls.model.parser_id, cls.model.parser_id,
cls.model.parser_config, cls.model.parser_config,
cls.model.pagerank] cls.model.pagerank,
cls.model.create_time,
cls.model.update_time
]
kbs = cls.model.select(*fields).join(Tenant, on=( kbs = cls.model.select(*fields).join(Tenant, on=(
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where( (Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
(cls.model.id == kb_id), (cls.model.id == kb_id),

View File

@ -70,7 +70,8 @@ def upload_file(auth, dataset_id, path):
def list_document(auth, dataset_id): def list_document(auth, dataset_id):
authorization = {"Authorization": auth} authorization = {"Authorization": auth}
url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}" url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}"
res = requests.get(url=url, headers=authorization) json = {}
res = requests.post(url=url, headers=authorization, json=json)
return res.json() return res.json()