Catch the exception while parsing pptx. (#4202)

### What problem does this PR solve?
#4189

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2024-12-24 10:49:28 +08:00 committed by GitHub
parent d030b4a680
commit 76cd23eecf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,7 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from io import BytesIO
from pptx import Presentation
@ -53,9 +53,12 @@ class RAGFlowPptParser(object):
texts = []
for shape in sorted(
slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left)):
txt = self.__extract(shape)
if txt:
texts.append(txt)
try:
txt = self.__extract(shape)
if txt:
texts.append(txt)
except Exception as e:
logging.exception(e)
txts.append("\n".join(texts))
return txts