mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
Fix: order chunks from docx by positions. (#7979)
### What problem does this PR solve? #7934 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
9f38b22a3f
commit
93f5df716f
@ -279,12 +279,13 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
|||||||
def tokenize_chunks_with_images(chunks, doc, eng, images):
|
def tokenize_chunks_with_images(chunks, doc, eng, images):
|
||||||
res = []
|
res = []
|
||||||
# wrap up as es documents
|
# wrap up as es documents
|
||||||
for ck, image in zip(chunks, images):
|
for ii, (ck, image) in enumerate(zip(chunks, images)):
|
||||||
if len(ck.strip()) == 0:
|
if len(ck.strip()) == 0:
|
||||||
continue
|
continue
|
||||||
logging.debug("-- {}".format(ck))
|
logging.debug("-- {}".format(ck))
|
||||||
d = copy.deepcopy(doc)
|
d = copy.deepcopy(doc)
|
||||||
d["image"] = image
|
d["image"] = image
|
||||||
|
add_positions(d, [[ii]*5])
|
||||||
tokenize(d, ck, eng)
|
tokenize(d, ck, eng)
|
||||||
res.append(d)
|
res.append(d)
|
||||||
return res
|
return res
|
||||||
|
Loading…
x
Reference in New Issue
Block a user