From c93897b3dfa8dd1798ef2b071f603b196e2dc966 Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Thu, 30 Jan 2025 11:14:39 +0100 Subject: [PATCH 1/2] Fix for the crash when encountering WMF images in pptx and docx backends on non Windows platforms Signed-off-by: Maksym Lysak --- docling/backend/mspowerpoint_backend.py | 9 ++++----- docling/backend/msword_backend.py | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index 995969d42..aecebdc06 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -271,13 +271,12 @@ def handle_title(self, shape, parent_slide, slide_ind, doc): return def handle_pictures(self, shape, parent_slide, slide_ind, doc): - # Get the image bytes - image = shape.image - image_bytes = image.blob - im_dpi, _ = image.dpi - # Open it with PIL try: + # Get the image bytes + image = shape.image + image_bytes = image.blob + im_dpi, _ = image.dpi pil_image = Image.open(BytesIO(image_bytes)) # shape has picture diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index f8148d525..0af3db572 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -520,11 +520,11 @@ def get_docx_image(element, drawing_blip): image_data = image_part.blob # Get the binary image data return image_data - image_data = get_docx_image(element, drawing_blip) - image_bytes = BytesIO(image_data) level = self.get_level() # Open the BytesIO object with PIL to create an Image try: + image_data = get_docx_image(element, drawing_blip) + image_bytes = BytesIO(image_data) pil_image = Image.open(image_bytes) doc.add_picture( parent=self.parents[level - 1], From 3224df9fdae8619b99bf7c46f4540869bef63dbc Mon Sep 17 00:00:00 2001 From: Maksym Lysak Date: Thu, 30 Jan 2025 14:25:43 +0100 Subject: [PATCH 2/2] Updated faq Signed-off-by: Maksym Lysak --- docs/faq.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/faq.md b/docs/faq.md index 96e12ed72..d8b85cdaa 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -151,3 +151,11 @@ This is a collection of FAQ collected from the user questions on