mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-07-31 00:02:02 +08:00
rm page number exception for pdf parser (#424)
### What problem does this PR solve? #423 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
453c29170f
commit
0499a3f621
@ -830,6 +830,7 @@ class HuParser:
|
||||
pn = [bx["page_number"]]
|
||||
top = bx["top"] - self.page_cum_height[pn[0] - 1]
|
||||
bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
|
||||
if pn[-1] - 1 >= len(self.page_images): return ""
|
||||
while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
|
||||
bott -= self.page_images[pn[-1] - 1].size[1] / ZM
|
||||
pn.append(pn[-1] + 1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user