mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 03:39:05 +08:00
fix mind map bug (#1934)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
13bcfd7ebd
commit
d73a75506e
@ -452,7 +452,7 @@ def get_image(image_id):
|
|||||||
@login_required
|
@login_required
|
||||||
@validate_request("conversation_id")
|
@validate_request("conversation_id")
|
||||||
def upload_and_parse():
|
def upload_and_parse():
|
||||||
req = request.json
|
from rag.app import presentation, picture, naive, audio, email
|
||||||
if 'file' not in request.files:
|
if 'file' not in request.files:
|
||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
@ -463,7 +463,7 @@ def upload_and_parse():
|
|||||||
return get_json_result(
|
return get_json_result(
|
||||||
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
||||||
|
|
||||||
e, conv = ConversationService.get_by_id(req["conversation_id"])
|
e, conv = ConversationService.get_by_id(request.form.get("conversation_id"))
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(retmsg="Conversation not found!")
|
return get_data_error_result(retmsg="Conversation not found!")
|
||||||
e, dia = DialogService.get_by_id(conv.dialog_id)
|
e, dia = DialogService.get_by_id(conv.dialog_id)
|
||||||
@ -487,6 +487,12 @@ def upload_and_parse():
|
|||||||
def dummy(prog=None, msg=""):
|
def dummy(prog=None, msg=""):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
FACTORY = {
|
||||||
|
ParserType.PRESENTATION.value: presentation,
|
||||||
|
ParserType.PICTURE.value: picture,
|
||||||
|
ParserType.AUDIO.value: audio,
|
||||||
|
ParserType.EMAIL.value: email
|
||||||
|
}
|
||||||
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
parser_config = {"chunk_token_num": 4096, "delimiter": "\n!?;。;!?", "layout_recognize": False}
|
||||||
exe = ThreadPoolExecutor(max_workers=12)
|
exe = ThreadPoolExecutor(max_workers=12)
|
||||||
threads = []
|
threads = []
|
||||||
@ -497,7 +503,7 @@ def upload_and_parse():
|
|||||||
"from_page": 0,
|
"from_page": 0,
|
||||||
"to_page": 100000
|
"to_page": 100000
|
||||||
}
|
}
|
||||||
threads.append(exe.submit(naive.chunk, d["name"], blob, **kwargs))
|
threads.append(exe.submit(FACTORY.get(d["parser_id"], naive).chunk, d["name"], blob, **kwargs))
|
||||||
|
|
||||||
for (docinfo,_), th in zip(files, threads):
|
for (docinfo,_), th in zip(files, threads):
|
||||||
docs = []
|
docs = []
|
||||||
@ -550,7 +556,7 @@ def upload_and_parse():
|
|||||||
for doc_id in docids:
|
for doc_id in docids:
|
||||||
cks = [c for c in docs if c["doc_id"] == doc_id]
|
cks = [c for c in docs if c["doc_id"] == doc_id]
|
||||||
|
|
||||||
if parser_ids[doc_id] != ParserType.PICTURE.value:
|
if False and parser_ids[doc_id] != ParserType.PICTURE.value:
|
||||||
mindmap = MindMapExtractor(llm_bdl)
|
mindmap = MindMapExtractor(llm_bdl)
|
||||||
try:
|
try:
|
||||||
mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output, ensure_ascii=False, indent=2)
|
mind_map = json.dumps(mindmap([c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]).output, ensure_ascii=False, indent=2)
|
||||||
@ -564,7 +570,7 @@ def upload_and_parse():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
stat_logger.error("Mind map generation error:", traceback.format_exc())
|
stat_logger.error("Mind map generation error:", traceback.format_exc())
|
||||||
|
|
||||||
vects = embedding(doc_id, cks)
|
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
||||||
assert len(cks) == len(vects)
|
assert len(cks) == len(vects)
|
||||||
for i, d in enumerate(cks):
|
for i, d in enumerate(cks):
|
||||||
v = vects[i]
|
v = vects[i]
|
||||||
@ -575,4 +581,4 @@ def upload_and_parse():
|
|||||||
DocumentService.increment_chunk_num(
|
DocumentService.increment_chunk_num(
|
||||||
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
|
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
|
||||||
|
|
||||||
return get_json_result(data=[d["id"] for d in files])
|
return get_json_result(data=[d["id"] for d,_ in files])
|
||||||
|
@ -46,6 +46,7 @@ class API4ConversationService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def stats(cls, tenant_id, from_date, to_date, source=None):
|
def stats(cls, tenant_id, from_date, to_date, source=None):
|
||||||
|
if len(to_date) == 10: to_date += " 23:59:59"
|
||||||
return cls.model.select(
|
return cls.model.select(
|
||||||
cls.model.create_date.truncate("day").alias("dt"),
|
cls.model.create_date.truncate("day").alias("dt"),
|
||||||
peewee.fn.COUNT(
|
peewee.fn.COUNT(
|
||||||
|
@ -113,7 +113,7 @@ class MindMapExtractor:
|
|||||||
"children": [{"id": self._key(k), "children": self._be_children(v, keyset)} for k, v in
|
"children": [{"id": self._key(k), "children": self._be_children(v, keyset)} for k, v in
|
||||||
merge_json.items() if isinstance(v, dict) and self._key(k)]}
|
merge_json.items() if isinstance(v, dict) and self._key(k)]}
|
||||||
else:
|
else:
|
||||||
k = self._key(list(self._be_children.keys())[0])
|
k = self._key(list(merge_json.keys())[0])
|
||||||
merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], set([k]))}
|
merge_json = {"id": k, "children": self._be_children(list(merge_json.items())[0][1], set([k]))}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -61,9 +61,8 @@ class Docx(DocxParser):
|
|||||||
if pn > to_page:
|
if pn > to_page:
|
||||||
break
|
break
|
||||||
if from_page <= pn < to_page:
|
if from_page <= pn < to_page:
|
||||||
current_image = None
|
|
||||||
if p.text.strip():
|
if p.text.strip():
|
||||||
if p.style.name == 'Caption':
|
if p.style and p.style.name == 'Caption':
|
||||||
former_image = None
|
former_image = None
|
||||||
if lines and lines[-1][1] and lines[-1][2] != 'Caption':
|
if lines and lines[-1][1] and lines[-1][2] != 'Caption':
|
||||||
former_image = lines[-1][1].pop()
|
former_image = lines[-1][1].pop()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user