mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-03 10:53:59 +08:00
Fix: order chunks from docx by positions. (#7979)
### What problem does this PR solve? #7934 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
9f38b22a3f
commit
93f5df716f
@ -279,12 +279,13 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
||||
def tokenize_chunks_with_images(chunks, doc, eng, images):
|
||||
res = []
|
||||
# wrap up as es documents
|
||||
for ck, image in zip(chunks, images):
|
||||
for ii, (ck, image) in enumerate(zip(chunks, images)):
|
||||
if len(ck.strip()) == 0:
|
||||
continue
|
||||
logging.debug("-- {}".format(ck))
|
||||
d = copy.deepcopy(doc)
|
||||
d["image"] = image
|
||||
add_positions(d, [[ii]*5])
|
||||
tokenize(d, ck, eng)
|
||||
res.append(d)
|
||||
return res
|
||||
|
Loading…
x
Reference in New Issue
Block a user