rm page number exception for pdf parser (#424)

### What problem does this PR solve? #423 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
2025-07-31 00:12:00 +08:00 · 2024-04-18 12:09:56 +08:00 · 2024-04-18 12:09:56 +08:00 · 0499a3f621
commit 0499a3f621
parent 453c29170f
1 changed files with 1 additions and 0 deletions
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@ -830,6 +830,7 @@ class HuParser:
        pn = [bx["page_number"]]
        top = bx["top"] - self.page_cum_height[pn[0] - 1]
        bott = bx["bottom"] - self.page_cum_height[pn[0] - 1]
+        if pn[-1] - 1 >= len(self.page_images): return ""
        while bott * ZM > self.page_images[pn[-1] - 1].size[1]:
            bott -= self.page_images[pn[-1] - 1].size[1] / ZM
            pn.append(pn[-1] + 1)