mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 20:29:06 +08:00
fix uploading docx for mind map (#2064)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
884fd83dc7
commit
89b05ad79f
@ -17,6 +17,8 @@ import hashlib
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import traceback
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
@ -33,7 +35,7 @@ from graphrag.mind_map_extractor import MindMapExtractor
|
||||
from rag.settings import SVR_QUEUE_NAME
|
||||
from rag.utils.es_conn import ELASTICSEARCH
|
||||
from rag.utils.minio_conn import MINIO
|
||||
from rag.nlp import search
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
|
||||
from api.db import FileType, TaskStatus, ParserType, LLMType
|
||||
from api.db.db_models import DB, Knowledgebase, Tenant, Task
|
||||
@ -432,6 +434,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
||||
exe = ThreadPoolExecutor(max_workers=12)
|
||||
threads = []
|
||||
doc_nm = {}
|
||||
for d, blob in files:
|
||||
doc_nm[d["id"]] = d["name"]
|
||||
for d, blob in files:
|
||||
kwargs = {
|
||||
"callback": dummy,
|
||||
@ -504,6 +509,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
||||
"id": get_uuid(),
|
||||
"doc_id": doc_id,
|
||||
"kb_id": [kb.id],
|
||||
"docnm_kwd": doc_nm[doc_id],
|
||||
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc_nm[doc_id])),
|
||||
"content_ltks": "",
|
||||
"content_with_weight": mind_map,
|
||||
"knowledge_graph_kwd": "mind_map"
|
||||
})
|
||||
|
Loading…
x
Reference in New Issue
Block a user