From 5aae73c230d453d033bdfab01c437129b27f3a83 Mon Sep 17 00:00:00 2001 From: dylan Date: Mon, 14 Apr 2025 10:10:20 +0800 Subject: [PATCH] Make error messages during PPT processing clearer. (#6980) ### What problem does this PR solve? Sometimes a slide may trigger a Proxy error (ArgumentException: Parameter is not valid) due to issues in the original file, and this error message can be confusing for users. ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [x] Other (please describe): --- rag/app/presentation.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rag/app/presentation.py b/rag/app/presentation.py index fcfabd39..741470d5 100644 --- a/rag/app/presentation.py +++ b/rag/app/presentation.py @@ -36,11 +36,14 @@ class Ppt(PptParser): imgs = [] with slides.Presentation(BytesIO(fnm)) as presentation: for i, slide in enumerate(presentation.slides[from_page: to_page]): - buffered = BytesIO() - slide.get_thumbnail( - 0.5, 0.5).save( - buffered, drawing.imaging.ImageFormat.jpeg) - imgs.append(Image.open(buffered)) + try: + buffered = BytesIO() + slide.get_thumbnail( + 0.5, 0.5).save( + buffered, drawing.imaging.ImageFormat.jpeg) + imgs.append(Image.open(buffered)) + except RuntimeError as e: + raise RuntimeError(f'ppt parse error at page {i+1}, original error: {str(e)}') from e assert len(imgs) == len( txts), "Slides text and image do not match: {} vs. {}".format(len(imgs), len(txts)) callback(0.9, "Image extraction finished")