Skip to content

Add PDF image optimization #229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ asn1crypto>=1.5.0
cryptography>=41.0.0
datadog==0.51.*
lzfse>=0.4.2
pypdf>=5.0.0
139 changes: 112 additions & 27 deletions scripts/test_image_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import List, NamedTuple

import pillow_heif # type: ignore
from pypdf import PdfReader, PdfWriter
from PIL import Image

# Register HEIF support
Expand Down Expand Up @@ -85,6 +86,36 @@ def test_heic_conversion(img: Image.Image, file_size: int) -> OptimizationResult
return None


def test_pdf_optimization(pdf_path: Path, file_size: int) -> OptimizationResult | None:
"""Test PDF optimization."""
try:
reader = PdfReader(pdf_path)
writer = PdfWriter()

for page in reader.pages:
page.compress_content_streams(level=9)
writer.add_page(page)

writer.compress_identical_objects(remove_identicals=True, remove_orphans=True)

with io.BytesIO() as buf:
writer.write(buf)
new_size = buf.tell()

if new_size < file_size:
savings = file_size - new_size
return OptimizationResult(
original_size=file_size,
optimized_size=new_size,
savings=savings,
savings_percent=(savings / file_size) * 100,
method="optimized_PDF"
)
except Exception as e:
print(f" ❌ PDF optimization failed: {e}")
return None


def save_optimized_image(img: Image.Image, output_path: Path, method: str) -> None:
"""Save the optimized image to disk."""
try:
Expand All @@ -99,17 +130,71 @@ def save_optimized_image(img: Image.Image, output_path: Path, method: str) -> No
print(f" ❌ Failed to save optimized image: {e}")


def process_image(image_path: Path, output_dir: Path) -> None:
"""Process a single image and save optimized versions."""
print(f"\n📸 Processing: {image_path.name}")
def save_optimized_pdf(pdf_path: Path, output_path: Path) -> None:
"""Save the optimized PDF to disk."""
try:
reader = PdfReader(pdf_path)
writer = PdfWriter()

for page in reader.pages:
page.compress_content_streams(level=9)
writer.add_page(page)

writer.compress_identical_objects(remove_identicals=True, remove_orphans=True)

with open(output_path, 'wb') as f:
writer.write(f)
except Exception as e:
print(f" ❌ Failed to save optimized PDF: {e}")


def process_file(file_path: Path, output_dir: Path) -> None:
"""Process a single file (image or PDF) and save optimized versions."""
print(f"\n📄 Processing: {file_path.name}")

try:
file_size = image_path.stat().st_size
file_size = file_path.stat().st_size
print(f" 📊 Original size: {format_size(file_size)}")

with Image.open(image_path) as img:
file_ext = file_path.suffix.lower()

# Handle PDF files
if file_ext == '.pdf':
print(f" 🏷️ Format: PDF")
results: List[OptimizationResult] = []

if result := test_pdf_optimization(file_path, file_size):
results.append(result)

if not results:
print(" ℹ️ No optimization opportunities found")
return

# Show results
print(" 💡 Optimization opportunities:")
for i, result in enumerate(results, 1):
print(f" {i}. {result.method}: "
f"{format_size(result.savings)} saved "
f"({result.savings_percent:.1f}%) "
f"→ {format_size(result.optimized_size)}")

# Save optimized PDF
best_result = results[0]
if best_result.savings >= 4096: # 4KB threshold
output_path = output_dir / f"{file_path.stem}_optimized.pdf"
save_optimized_pdf(file_path, output_path)
print(f" ✅ Saved optimized version: {output_path.name}")
print(f" 🎯 Savings: {format_size(best_result.savings)} "
f"({best_result.savings_percent:.1f}%)")
else:
print(f" ⚠️ Savings ({format_size(best_result.savings)}) "
f"below 4KB threshold")
return

# Handle image files
with Image.open(file_path) as img:
img.load() # type: ignore
fmt = (img.format or image_path.suffix[1:]).lower()
fmt = (img.format or file_path.suffix[1:]).lower()
print(f" 🏷️ Format: {fmt.upper()}, Mode: {img.mode}, Size: {img.size}")

results: List[OptimizationResult] = []
Expand Down Expand Up @@ -145,13 +230,13 @@ def process_image(image_path: Path, output_dir: Path) -> None:
# Save the best optimization
best_result = max(results, key=lambda r: r.savings)
if best_result.savings >= 4096: # 4KB threshold
stem = image_path.stem
stem = file_path.stem
if best_result.method == "HEIC":
output_path = output_dir / f"{stem}_optimized.heic"
elif best_result.method.startswith("minified_"):
output_path = output_dir / f"{stem}_optimized{image_path.suffix}"
output_path = output_dir / f"{stem}_optimized{file_path.suffix}"
else:
output_path = output_dir / f"{stem}_optimized{image_path.suffix}"
output_path = output_dir / f"{stem}_optimized{file_path.suffix}"

save_optimized_image(img, output_path, best_result.method)
print(f" ✅ Saved optimized version: {output_path.name}")
Expand All @@ -162,18 +247,18 @@ def process_image(image_path: Path, output_dir: Path) -> None:
f"below 4KB threshold")

except Exception as e:
print(f" ❌ Failed to process image: {e}")
print(f" ❌ Failed to process file: {e}")


def main():
"""Main function."""
if len(sys.argv) != 2:
print("Usage: python test_image_optimization.py <directory_or_image_path>")
print("Usage: python test_image_optimization.py <directory_or_file_path>")
print("\nThis script will:")
print("- Analyze images for optimization opportunities")
print("- Analyze images and PDFs for optimization opportunities")
print("- Save optimized versions with '_optimized' suffix")
print("- Show before/after file sizes and savings")
print("- Allow you to visually compare original vs optimized")
print("- Allow you to compare original vs optimized files")
sys.exit(1)

input_path = Path(sys.argv[1])
Expand All @@ -183,35 +268,35 @@ def main():
sys.exit(1)

# Collect image files
image_extensions = {'.png', '.jpg', '.jpeg', '.heif', '.heic'}
image_extensions = {'.png', '.jpg', '.jpeg', '.heif', '.heic', '.pdf'}

if input_path.is_file():
if input_path.suffix.lower() not in image_extensions:
print(f"❌ Not a supported image file: {input_path}")
print(f"❌ Not a supported file: {input_path}")
sys.exit(1)
image_files = [input_path]
files = [input_path]
output_dir = input_path.parent
else:
image_files: List[Path] = []
files: List[Path] = []
for ext in image_extensions:
image_files.extend(input_path.glob(f"*{ext}"))
image_files.extend(input_path.glob(f"*{ext.upper()}"))
files.extend(input_path.glob(f"*{ext}"))
files.extend(input_path.glob(f"*{ext.upper()}"))

if not image_files:
print(f"❌ No supported image files found in: {input_path}")
if not files:
print(f"❌ No supported files found in: {input_path}")
sys.exit(1)
output_dir = input_path

print(f"🔍 Found {len(image_files)} image(s) to process")
print(f"🔍 Found {len(files)} file(s) to process")
print(f"📁 Output directory: {output_dir}")

# Process each image
for image_file in sorted(image_files):
process_image(image_file, output_dir)
# Process each file
for file in sorted(files):
process_file(file, output_dir)

print(f"\n✨ Processing complete!")
print(f"📂 Check {output_dir} for optimized images with '_optimized' suffix")
print("👀 Open original and optimized images side-by-side to compare quality")
print(f"📂 Check {output_dir} for optimized files with '_optimized' suffix")
print("👀 Open original and optimized files side-by-side to compare quality")


if __name__ == "__main__":
Expand Down
35 changes: 32 additions & 3 deletions src/launchpad/size/insights/apple/image_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pillow_heif # type: ignore

from PIL import Image
from pypdf import PdfReader, PdfWriter

from launchpad.size.insights.insight import Insight, InsightsInput
from launchpad.size.models.common import FileInfo
Expand Down Expand Up @@ -38,7 +39,7 @@ class _OptimizationResult:
class ImageOptimizationInsight(Insight[ImageOptimizationInsightResult]):
"""Analyse image optimisation opportunities in iOS apps."""

OPTIMIZABLE_FORMATS = {"png", "jpg", "jpeg", "heif", "heic"}
OPTIMIZABLE_FORMATS = {"png", "jpg", "jpeg", "heif", "heic", "pdf"}
MIN_SAVINGS_THRESHOLD = 4096
TARGET_JPEG_QUALITY = 85
TARGET_HEIC_QUALITY = 85
Expand Down Expand Up @@ -104,8 +105,16 @@ def _analyze_image_optimization(
if res := self._check_heic_minification(img, file_size):
minify_savings, minified_size = res.savings, res.optimized_size
except Exception as exc:
logger.error("Failed to process %s: %s", display_path, exc)
return None
if file_type.lower() == "pdf":
try:
if res := self._check_pdf_optimization(full_path, file_size):
minify_savings, minified_size = res.savings, res.optimized_size
except Exception as pdf_exc:
logger.error("Failed to process PDF %s: %s", display_path, pdf_exc)
return None
else:
logger.error("Failed to process %s: %s", display_path, exc)
return None

if max(minify_savings, conversion_savings) < self.MIN_SAVINGS_THRESHOLD:
return None
Expand Down Expand Up @@ -154,6 +163,26 @@ def _check_heic_minification(self, img: Image.Image, file_size: int) -> _Optimiz
logger.error("HEIC minification check failed: %s", exc)
return None

def _check_pdf_optimization(self, file_path: Path, file_size: int) -> _OptimizationResult | None:
try:
reader = PdfReader(file_path)
writer = PdfWriter()

for page in reader.pages:
page.compress_content_streams(level=9)
writer.add_page(page)

writer.compress_identical_objects(remove_identicals=True, remove_orphans=True)

with io.BytesIO() as buf:
writer.write(buf)
new_size = buf.tell()

return _OptimizationResult(file_size - new_size, new_size) if new_size < file_size else None
except Exception as exc:
logger.error("PDF optimization check failed: %s", exc)
return None

def _iter_optimizable_files(self, files: Sequence[FileInfo]) -> Iterable[FileInfo]:
for fi in files:
if fi.file_type == "car":
Expand Down
Loading