-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpptx_processor.py
More file actions
22 lines (17 loc) · 866 Bytes
/
Copy pathpptx_processor.py
File metadata and controls
22 lines (17 loc) · 866 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from pptx import Presentation
from io import BytesIO
def extract_text_from_pptx(file_content: bytes) -> dict:
prs = Presentation(BytesIO(file_content))
slides_text = {}
for slide_index, slide in enumerate(prs.slides, start=1):
text = []
for shape in slide.shapes:
if hasattr(shape, "text") and shape.text.strip():
if slide.shapes.index(shape) == 0:
title = shape.text.strip()
slides_text[f"slide_{slide_index}"] = {"title": title, "content": []}
else:
slides_text[f"slide_{slide_index}"]["content"].append(shape.text.strip())
if f"slide_{slide_index}" in slides_text:
slides_text[f"slide_{slide_index}"]["content"] = " ".join(slides_text[f"slide_{slide_index}"]["content"])
return slides_text