mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-14 04:26:05 +08:00
[BREAKING CHANGE] GET to POST: enhance document list capability (#7349)
### What problem does this PR solve? Enhance capability of `list_docs`. Breaking change: change method from `GET` to `POST`. ### Type of change - [x] Refactoring - [x] Enhancement with breaking change
This commit is contained in:
parent
5043143bc5
commit
a4be6c50cf
@ -21,7 +21,7 @@ from flask import request, Response
|
|||||||
from api.db.services.llm_service import TenantLLMService
|
from api.db.services.llm_service import TenantLLMService
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
from api.db import FileType, LLMType, ParserType, FileSource
|
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, LLMType, ParserType, FileSource
|
||||||
from api.db.db_models import APIToken, Task, File
|
from api.db.db_models import APIToken, Task, File
|
||||||
from api.db.services import duplicate_name
|
from api.db.services import duplicate_name
|
||||||
from api.db.services.api_service import APITokenService, API4ConversationService
|
from api.db.services.api_service import APITokenService, API4ConversationService
|
||||||
@ -577,10 +577,23 @@ def list_kb_docs():
|
|||||||
orderby = req.get("orderby", "create_time")
|
orderby = req.get("orderby", "create_time")
|
||||||
desc = req.get("desc", True)
|
desc = req.get("desc", True)
|
||||||
keywords = req.get("keywords", "")
|
keywords = req.get("keywords", "")
|
||||||
|
status = req.get("status", [])
|
||||||
|
if status:
|
||||||
|
invalid_status = {s for s in status if s not in VALID_TASK_STATUS}
|
||||||
|
if invalid_status:
|
||||||
|
return get_data_error_result(
|
||||||
|
message=f"Invalid filter status conditions: {', '.join(invalid_status)}"
|
||||||
|
)
|
||||||
|
types = req.get("types", [])
|
||||||
|
if types:
|
||||||
|
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||||
|
if invalid_types:
|
||||||
|
return get_data_error_result(
|
||||||
|
message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
docs, tol = DocumentService.get_by_kb_id(
|
docs, tol = DocumentService.get_by_kb_id(
|
||||||
kb_id, page_number, items_per_page, orderby, desc, keywords)
|
kb_id, page_number, items_per_page, orderby, desc, keywords, status, types)
|
||||||
docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs]
|
docs = [{"doc_id": doc['id'], "doc_name": doc['name']} for doc in docs]
|
||||||
|
|
||||||
return get_json_result(data={"total": tol, "docs": docs})
|
return get_json_result(data={"total": tol, "docs": docs})
|
||||||
|
@ -25,7 +25,7 @@ from flask_login import login_required, current_user
|
|||||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||||
from rag.nlp import search
|
from rag.nlp import search
|
||||||
|
|
||||||
from api.db import FileType, TaskStatus, ParserType, FileSource
|
from api.db import VALID_FILE_TYPES, VALID_TASK_STATUS, FileType, TaskStatus, ParserType, FileSource
|
||||||
from api.db.db_models import File, Task
|
from api.db.db_models import File, Task
|
||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
@ -183,7 +183,7 @@ def create():
|
|||||||
return server_error_response(e)
|
return server_error_response(e)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/list', methods=['GET']) # noqa: F821
|
@manager.route('/list', methods=['POST']) # noqa: F821
|
||||||
@login_required
|
@login_required
|
||||||
def list_docs():
|
def list_docs():
|
||||||
kb_id = request.args.get("kb_id")
|
kb_id = request.args.get("kb_id")
|
||||||
@ -201,13 +201,32 @@ def list_docs():
|
|||||||
code=settings.RetCode.OPERATING_ERROR)
|
code=settings.RetCode.OPERATING_ERROR)
|
||||||
keywords = request.args.get("keywords", "")
|
keywords = request.args.get("keywords", "")
|
||||||
|
|
||||||
page_number = int(request.args.get("page", 1))
|
page_number = int(request.args.get("page", 0))
|
||||||
items_per_page = int(request.args.get("page_size", 15))
|
items_per_page = int(request.args.get("page_size", 0))
|
||||||
orderby = request.args.get("orderby", "create_time")
|
orderby = request.args.get("orderby", "create_time")
|
||||||
desc = request.args.get("desc", True)
|
desc = request.args.get("desc", True)
|
||||||
|
|
||||||
|
req = request.get_json()
|
||||||
|
|
||||||
|
run_status = req.get("run_status", [])
|
||||||
|
if run_status:
|
||||||
|
invalid_status = {s for s in run_status if s not in VALID_TASK_STATUS}
|
||||||
|
if invalid_status:
|
||||||
|
return get_data_error_result(
|
||||||
|
message=f"Invalid filter run status conditions: {', '.join(invalid_status)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
types = req.get("types", [])
|
||||||
|
if types:
|
||||||
|
invalid_types = {t for t in types if t not in VALID_FILE_TYPES}
|
||||||
|
if invalid_types:
|
||||||
|
return get_data_error_result(
|
||||||
|
message=f"Invalid filter conditions: {', '.join(invalid_types)} type{'s' if len(invalid_types) > 1 else ''}"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
docs, tol = DocumentService.get_by_kb_id(
|
docs, tol = DocumentService.get_by_kb_id(
|
||||||
kb_id, page_number, items_per_page, orderby, desc, keywords)
|
kb_id, page_number, items_per_page, orderby, desc, keywords, run_status, types)
|
||||||
|
|
||||||
for doc_item in docs:
|
for doc_item in docs:
|
||||||
if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX):
|
if doc_item['thumbnail'] and not doc_item['thumbnail'].startswith(IMG_BASE64_PREFIX):
|
||||||
@ -331,7 +350,9 @@ def rm():
|
|||||||
message="Database error (Document removal)!")
|
message="Database error (Document removal)!")
|
||||||
|
|
||||||
f2d = File2DocumentService.get_by_document_id(doc_id)
|
f2d = File2DocumentService.get_by_document_id(doc_id)
|
||||||
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
deleted_file_count = 0
|
||||||
|
if f2d:
|
||||||
|
deleted_file_count = FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
||||||
File2DocumentService.delete_by_document_id(doc_id)
|
File2DocumentService.delete_by_document_id(doc_id)
|
||||||
if deleted_file_count > 0:
|
if deleted_file_count > 0:
|
||||||
STORAGE_IMPL.rm(b, n)
|
STORAGE_IMPL.rm(b, n)
|
||||||
|
@ -49,6 +49,7 @@ class FileType(StrEnum):
|
|||||||
FOLDER = 'folder'
|
FOLDER = 'folder'
|
||||||
OTHER = "other"
|
OTHER = "other"
|
||||||
|
|
||||||
|
VALID_FILE_TYPES = {FileType.PDF, FileType.DOC, FileType.VISUAL, FileType.AURAL, FileType.VIRTUAL, FileType.FOLDER, FileType.OTHER}
|
||||||
|
|
||||||
class LLMType(StrEnum):
|
class LLMType(StrEnum):
|
||||||
CHAT = 'chat'
|
CHAT = 'chat'
|
||||||
@ -73,6 +74,7 @@ class TaskStatus(StrEnum):
|
|||||||
DONE = "3"
|
DONE = "3"
|
||||||
FAIL = "4"
|
FAIL = "4"
|
||||||
|
|
||||||
|
VALID_TASK_STATUS = {TaskStatus.UNSTART, TaskStatus.RUNNING, TaskStatus.CANCEL, TaskStatus.DONE, TaskStatus.FAIL}
|
||||||
|
|
||||||
class ParserType(StrEnum):
|
class ParserType(StrEnum):
|
||||||
PRESENTATION = "presentation"
|
PRESENTATION = "presentation"
|
||||||
|
@ -70,7 +70,7 @@ class DocumentService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
||||||
orderby, desc, keywords):
|
orderby, desc, keywords, run_status, types):
|
||||||
if keywords:
|
if keywords:
|
||||||
docs = cls.model.select().where(
|
docs = cls.model.select().where(
|
||||||
(cls.model.kb_id == kb_id),
|
(cls.model.kb_id == kb_id),
|
||||||
@ -78,13 +78,21 @@ class DocumentService(CommonService):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
docs = cls.model.select().where(cls.model.kb_id == kb_id)
|
docs = cls.model.select().where(cls.model.kb_id == kb_id)
|
||||||
|
|
||||||
|
if run_status:
|
||||||
|
docs = docs.where(cls.model.run.in_(run_status))
|
||||||
|
if types:
|
||||||
|
docs = docs.where(cls.model.type.in_(types))
|
||||||
|
|
||||||
count = docs.count()
|
count = docs.count()
|
||||||
if desc:
|
if desc:
|
||||||
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
||||||
else:
|
else:
|
||||||
docs = docs.order_by(cls.model.getter_by(orderby).asc())
|
docs = docs.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
docs = docs.paginate(page_number, items_per_page)
|
|
||||||
|
if page_number and items_per_page:
|
||||||
|
docs = docs.paginate(page_number, items_per_page)
|
||||||
|
|
||||||
return list(docs.dicts()), count
|
return list(docs.dicts()), count
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ class KnowledgebaseService(CommonService):
|
|||||||
kb = kbs[0]
|
kb = kbs[0]
|
||||||
|
|
||||||
# Get all documents in the knowledge base
|
# Get all documents in the knowledge base
|
||||||
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "")
|
docs, _ = DocumentService.get_by_kb_id(kb_id, 1, 1000, "create_time", True, "", [], [])
|
||||||
|
|
||||||
# Check parsing status of each document
|
# Check parsing status of each document
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
@ -226,7 +226,10 @@ class KnowledgebaseService(CommonService):
|
|||||||
cls.model.chunk_num,
|
cls.model.chunk_num,
|
||||||
cls.model.parser_id,
|
cls.model.parser_id,
|
||||||
cls.model.parser_config,
|
cls.model.parser_config,
|
||||||
cls.model.pagerank]
|
cls.model.pagerank,
|
||||||
|
cls.model.create_time,
|
||||||
|
cls.model.update_time
|
||||||
|
]
|
||||||
kbs = cls.model.select(*fields).join(Tenant, on=(
|
kbs = cls.model.select(*fields).join(Tenant, on=(
|
||||||
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
(Tenant.id == cls.model.tenant_id) & (Tenant.status == StatusEnum.VALID.value))).where(
|
||||||
(cls.model.id == kb_id),
|
(cls.model.id == kb_id),
|
||||||
|
@ -70,7 +70,8 @@ def upload_file(auth, dataset_id, path):
|
|||||||
def list_document(auth, dataset_id):
|
def list_document(auth, dataset_id):
|
||||||
authorization = {"Authorization": auth}
|
authorization = {"Authorization": auth}
|
||||||
url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}"
|
url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}"
|
||||||
res = requests.get(url=url, headers=authorization)
|
json = {}
|
||||||
|
res = requests.post(url=url, headers=authorization, json=json)
|
||||||
return res.json()
|
return res.json()
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user