mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-20 05:00:01 +08:00
Feat: text file support position retaining. (#6231)
### What problem does this PR solve? #5832 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
parent
6e5cbd0196
commit
a087d13ccb
@ -258,7 +258,7 @@ def tokenize(d, t, eng):
|
||||
def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
||||
res = []
|
||||
# wrap up as es documents
|
||||
for ck in chunks:
|
||||
for ii, ck in enumerate(chunks):
|
||||
if len(ck.strip()) == 0:
|
||||
continue
|
||||
logging.debug("-- {}".format(ck))
|
||||
@ -270,6 +270,8 @@ def tokenize_chunks(chunks, doc, eng, pdf_parser=None):
|
||||
ck = pdf_parser.remove_tag(ck)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
else:
|
||||
add_positions(d, [[ii]*5])
|
||||
tokenize(d, ck, eng)
|
||||
res.append(d)
|
||||
return res
|
||||
|
Loading…
x
Reference in New Issue
Block a user