mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 18:19:13 +08:00
fix uploading docx for mind map (#2064)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
884fd83dc7
commit
89b05ad79f
@ -17,6 +17,8 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
|
import traceback
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@ -33,7 +35,7 @@ from graphrag.mind_map_extractor import MindMapExtractor
|
|||||||
from rag.settings import SVR_QUEUE_NAME
|
from rag.settings import SVR_QUEUE_NAME
|
||||||
from rag.utils.es_conn import ELASTICSEARCH
|
from rag.utils.es_conn import ELASTICSEARCH
|
||||||
from rag.utils.minio_conn import MINIO
|
from rag.utils.minio_conn import MINIO
|
||||||
from rag.nlp import search
|
from rag.nlp import search, rag_tokenizer
|
||||||
|
|
||||||
from api.db import FileType, TaskStatus, ParserType, LLMType
|
from api.db import FileType, TaskStatus, ParserType, LLMType
|
||||||
from api.db.db_models import DB, Knowledgebase, Tenant, Task
|
from api.db.db_models import DB, Knowledgebase, Tenant, Task
|
||||||
@ -432,6 +434,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|||||||
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
||||||
exe = ThreadPoolExecutor(max_workers=12)
|
exe = ThreadPoolExecutor(max_workers=12)
|
||||||
threads = []
|
threads = []
|
||||||
|
doc_nm = {}
|
||||||
|
for d, blob in files:
|
||||||
|
doc_nm[d["id"]] = d["name"]
|
||||||
for d, blob in files:
|
for d, blob in files:
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"callback": dummy,
|
"callback": dummy,
|
||||||
@ -504,6 +509,9 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|||||||
"id": get_uuid(),
|
"id": get_uuid(),
|
||||||
"doc_id": doc_id,
|
"doc_id": doc_id,
|
||||||
"kb_id": [kb.id],
|
"kb_id": [kb.id],
|
||||||
|
"docnm_kwd": doc_nm[doc_id],
|
||||||
|
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", doc_nm[doc_id])),
|
||||||
|
"content_ltks": "",
|
||||||
"content_with_weight": mind_map,
|
"content_with_weight": mind_map,
|
||||||
"knowledge_graph_kwd": "mind_map"
|
"knowledge_graph_kwd": "mind_map"
|
||||||
})
|
})
|
||||||
|
Loading…
x
Reference in New Issue
Block a user