Make error messages during PPT processing clearer. (#6980)

### What problem does this PR solve?

Sometimes a slide may trigger a Proxy error (ArgumentException:
Parameter is not valid) due to issues in the original file, and this
error message can be confusing for users.

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [x] Other (please describe):
This commit is contained in:
dylan 2025-04-14 10:10:20 +08:00 committed by GitHub
parent b578451e6a
commit 5aae73c230
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -36,11 +36,14 @@ class Ppt(PptParser):
imgs = [] imgs = []
with slides.Presentation(BytesIO(fnm)) as presentation: with slides.Presentation(BytesIO(fnm)) as presentation:
for i, slide in enumerate(presentation.slides[from_page: to_page]): for i, slide in enumerate(presentation.slides[from_page: to_page]):
try:
buffered = BytesIO() buffered = BytesIO()
slide.get_thumbnail( slide.get_thumbnail(
0.5, 0.5).save( 0.5, 0.5).save(
buffered, drawing.imaging.ImageFormat.jpeg) buffered, drawing.imaging.ImageFormat.jpeg)
imgs.append(Image.open(buffered)) imgs.append(Image.open(buffered))
except RuntimeError as e:
raise RuntimeError(f'ppt parse error at page {i+1}, original error: {str(e)}') from e
assert len(imgs) == len( assert len(imgs) == len(
txts), "Slides text and image do not match: {} vs. {}".format(len(imgs), len(txts)) txts), "Slides text and image do not match: {} vs. {}".format(len(imgs), len(txts))
callback(0.9, "Image extraction finished") callback(0.9, "Image extraction finished")