mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-07-27 00:42:02 +08:00
Fix: fix document concurrent upload issue (#6095)
### What problem does this PR solve? Resolve document concurrent upload issue. #6039 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
9d94acbedb
commit
d7774cf049
@ -13,33 +13,30 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
import logging
|
|
||||||
import xxhash
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import trio
|
|
||||||
|
|
||||||
|
import trio
|
||||||
|
import xxhash
|
||||||
from peewee import fn
|
from peewee import fn
|
||||||
|
|
||||||
from api.db.db_utils import bulk_insert_into_db
|
|
||||||
from api import settings
|
from api import settings
|
||||||
from api.utils import current_timestamp, get_format_time, get_uuid
|
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus
|
||||||
from rag.settings import SVR_QUEUE_NAME
|
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant
|
||||||
from rag.utils.storage_factory import STORAGE_IMPL
|
from api.db.db_utils import bulk_insert_into_db
|
||||||
from rag.nlp import search, rag_tokenizer
|
|
||||||
|
|
||||||
from api.db import FileType, TaskStatus, ParserType, LLMType
|
|
||||||
from api.db.db_models import DB, Knowledgebase, Tenant, Task, UserTenant
|
|
||||||
from api.db.db_models import Document
|
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.db import StatusEnum
|
from api.utils import current_timestamp, get_format_time, get_uuid
|
||||||
|
from rag.nlp import rag_tokenizer, search
|
||||||
|
from rag.settings import SVR_QUEUE_NAME
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
from rag.utils.storage_factory import STORAGE_IMPL
|
||||||
|
|
||||||
|
|
||||||
class DocumentService(CommonService):
|
class DocumentService(CommonService):
|
||||||
@ -96,9 +93,7 @@ class DocumentService(CommonService):
|
|||||||
def insert(cls, doc):
|
def insert(cls, doc):
|
||||||
if not cls.save(**doc):
|
if not cls.save(**doc):
|
||||||
raise RuntimeError("Database error (Document)!")
|
raise RuntimeError("Database error (Document)!")
|
||||||
e, kb = KnowledgebaseService.get_by_id(doc["kb_id"])
|
if not KnowledgebaseService.atomic_increase_doc_num_by_id(doc["kb_id"]):
|
||||||
if not KnowledgebaseService.update_by_id(
|
|
||||||
kb.id, {"doc_num": kb.doc_num + 1}):
|
|
||||||
raise RuntimeError("Database error (Knowledgebase)!")
|
raise RuntimeError("Database error (Knowledgebase)!")
|
||||||
return Document(**doc)
|
return Document(**doc)
|
||||||
|
|
||||||
@ -374,6 +369,7 @@ class DocumentService(CommonService):
|
|||||||
"progress_msg": "Task is queued...",
|
"progress_msg": "Task is queued...",
|
||||||
"process_begin_at": get_format_time()
|
"process_begin_at": get_format_time()
|
||||||
})
|
})
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def update_meta_fields(cls, doc_id, meta_fields):
|
def update_meta_fields(cls, doc_id, meta_fields):
|
||||||
@ -480,13 +476,13 @@ def queue_raptor_o_graphrag_tasks(doc, ty):
|
|||||||
|
|
||||||
|
|
||||||
def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||||
from rag.app import presentation, picture, naive, audio, email
|
from api.db.services.api_service import API4ConversationService
|
||||||
|
from api.db.services.conversation_service import ConversationService
|
||||||
from api.db.services.dialog_service import DialogService
|
from api.db.services.dialog_service import DialogService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.llm_service import LLMBundle
|
from api.db.services.llm_service import LLMBundle
|
||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from api.db.services.api_service import API4ConversationService
|
from rag.app import audio, email, naive, picture, presentation
|
||||||
from api.db.services.conversation_service import ConversationService
|
|
||||||
|
|
||||||
e, conv = ConversationService.get_by_id(conversation_id)
|
e, conv = ConversationService.get_by_id(conversation_id)
|
||||||
if not e:
|
if not e:
|
||||||
|
@ -13,11 +13,15 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
from api.db import StatusEnum, TenantPermission
|
from datetime import datetime
|
||||||
from api.db.db_models import Knowledgebase, DB, Tenant, User, UserTenant,Document
|
|
||||||
from api.db.services.common_service import CommonService
|
|
||||||
from peewee import fn
|
from peewee import fn
|
||||||
|
|
||||||
|
from api.db import StatusEnum, TenantPermission
|
||||||
|
from api.db.db_models import DB, Document, Knowledgebase, Tenant, User, UserTenant
|
||||||
|
from api.db.services.common_service import CommonService
|
||||||
|
from api.utils import current_timestamp, datetime_format
|
||||||
|
|
||||||
|
|
||||||
class KnowledgebaseService(CommonService):
|
class KnowledgebaseService(CommonService):
|
||||||
"""Service class for managing knowledge base operations.
|
"""Service class for managing knowledge base operations.
|
||||||
@ -108,16 +112,16 @@ class KnowledgebaseService(CommonService):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def list_documents_by_ids(cls,kb_ids):
|
def list_documents_by_ids(cls, kb_ids):
|
||||||
# Get document IDs associated with given knowledge base IDs
|
# Get document IDs associated with given knowledge base IDs
|
||||||
# Args:
|
# Args:
|
||||||
# kb_ids: List of knowledge base IDs
|
# kb_ids: List of knowledge base IDs
|
||||||
# Returns:
|
# Returns:
|
||||||
# List of document IDs
|
# List of document IDs
|
||||||
doc_ids=cls.model.select(Document.id.alias("document_id")).join(Document,on=(cls.model.id == Document.kb_id)).where(
|
doc_ids = cls.model.select(Document.id.alias("document_id")).join(Document, on=(cls.model.id == Document.kb_id)).where(
|
||||||
cls.model.id.in_(kb_ids)
|
cls.model.id.in_(kb_ids)
|
||||||
)
|
)
|
||||||
doc_ids =list(doc_ids.dicts())
|
doc_ids = list(doc_ids.dicts())
|
||||||
doc_ids = [doc["document_id"] for doc in doc_ids]
|
doc_ids = [doc["document_id"] for doc in doc_ids]
|
||||||
return doc_ids
|
return doc_ids
|
||||||
|
|
||||||
@ -381,3 +385,12 @@ class KnowledgebaseService(CommonService):
|
|||||||
kbs = kbs.dicts()
|
kbs = kbs.dicts()
|
||||||
return list(kbs)
|
return list(kbs)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@DB.connection_context()
|
||||||
|
def atomic_increase_doc_num_by_id(cls, kb_id):
|
||||||
|
data = {}
|
||||||
|
data["update_time"] = current_timestamp()
|
||||||
|
data["update_date"] = datetime_format(datetime.now())
|
||||||
|
data["doc_num"] = cls.model.doc_num + 1
|
||||||
|
num = cls.model.update(data).where(cls.model.id == kb_id).execute()
|
||||||
|
return num
|
||||||
|
Loading…
x
Reference in New Issue
Block a user