diff --git a/requirements.txt b/requirements.txt index c135af92..341c126c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ asn1crypto>=1.5.0 cryptography>=41.0.0 datadog==0.51.* lzfse>=0.4.2 +pypdf>=5.0.0 diff --git a/scripts/test_image_optimization.py b/scripts/test_image_optimization.py index cf25d7fb..dde6732f 100755 --- a/scripts/test_image_optimization.py +++ b/scripts/test_image_optimization.py @@ -11,6 +11,7 @@ from typing import List, NamedTuple import pillow_heif # type: ignore +from pypdf import PdfReader, PdfWriter from PIL import Image # Register HEIF support @@ -85,6 +86,36 @@ def test_heic_conversion(img: Image.Image, file_size: int) -> OptimizationResult return None +def test_pdf_optimization(pdf_path: Path, file_size: int) -> OptimizationResult | None: + """Test PDF optimization.""" + try: + reader = PdfReader(pdf_path) + writer = PdfWriter() + + for page in reader.pages: + page.compress_content_streams(level=9) + writer.add_page(page) + + writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) + + with io.BytesIO() as buf: + writer.write(buf) + new_size = buf.tell() + + if new_size < file_size: + savings = file_size - new_size + return OptimizationResult( + original_size=file_size, + optimized_size=new_size, + savings=savings, + savings_percent=(savings / file_size) * 100, + method="optimized_PDF" + ) + except Exception as e: + print(f" āŒ PDF optimization failed: {e}") + return None + + def save_optimized_image(img: Image.Image, output_path: Path, method: str) -> None: """Save the optimized image to disk.""" try: @@ -99,17 +130,71 @@ def save_optimized_image(img: Image.Image, output_path: Path, method: str) -> No print(f" āŒ Failed to save optimized image: {e}") -def process_image(image_path: Path, output_dir: Path) -> None: - """Process a single image and save optimized versions.""" - print(f"\nšŸ“ø Processing: {image_path.name}") +def save_optimized_pdf(pdf_path: Path, output_path: Path) -> None: + """Save the optimized PDF to disk.""" + try: + reader = PdfReader(pdf_path) + writer = PdfWriter() + + for page in reader.pages: + page.compress_content_streams(level=9) + writer.add_page(page) + + writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) + + with open(output_path, 'wb') as f: + writer.write(f) + except Exception as e: + print(f" āŒ Failed to save optimized PDF: {e}") + + +def process_file(file_path: Path, output_dir: Path) -> None: + """Process a single file (image or PDF) and save optimized versions.""" + print(f"\nšŸ“„ Processing: {file_path.name}") try: - file_size = image_path.stat().st_size + file_size = file_path.stat().st_size print(f" šŸ“Š Original size: {format_size(file_size)}") - with Image.open(image_path) as img: + file_ext = file_path.suffix.lower() + + # Handle PDF files + if file_ext == '.pdf': + print(f" šŸ·ļø Format: PDF") + results: List[OptimizationResult] = [] + + if result := test_pdf_optimization(file_path, file_size): + results.append(result) + + if not results: + print(" ā„¹ļø No optimization opportunities found") + return + + # Show results + print(" šŸ’” Optimization opportunities:") + for i, result in enumerate(results, 1): + print(f" {i}. {result.method}: " + f"{format_size(result.savings)} saved " + f"({result.savings_percent:.1f}%) " + f"→ {format_size(result.optimized_size)}") + + # Save optimized PDF + best_result = results[0] + if best_result.savings >= 4096: # 4KB threshold + output_path = output_dir / f"{file_path.stem}_optimized.pdf" + save_optimized_pdf(file_path, output_path) + print(f" āœ… Saved optimized version: {output_path.name}") + print(f" šŸŽÆ Savings: {format_size(best_result.savings)} " + f"({best_result.savings_percent:.1f}%)") + else: + print(f" āš ļø Savings ({format_size(best_result.savings)}) " + f"below 4KB threshold") + return + + # Handle image files + with Image.open(file_path) as img: img.load() # type: ignore - fmt = (img.format or image_path.suffix[1:]).lower() + fmt = (img.format or file_path.suffix[1:]).lower() print(f" šŸ·ļø Format: {fmt.upper()}, Mode: {img.mode}, Size: {img.size}") results: List[OptimizationResult] = [] @@ -145,13 +230,13 @@ def process_image(image_path: Path, output_dir: Path) -> None: # Save the best optimization best_result = max(results, key=lambda r: r.savings) if best_result.savings >= 4096: # 4KB threshold - stem = image_path.stem + stem = file_path.stem if best_result.method == "HEIC": output_path = output_dir / f"{stem}_optimized.heic" elif best_result.method.startswith("minified_"): - output_path = output_dir / f"{stem}_optimized{image_path.suffix}" + output_path = output_dir / f"{stem}_optimized{file_path.suffix}" else: - output_path = output_dir / f"{stem}_optimized{image_path.suffix}" + output_path = output_dir / f"{stem}_optimized{file_path.suffix}" save_optimized_image(img, output_path, best_result.method) print(f" āœ… Saved optimized version: {output_path.name}") @@ -162,18 +247,18 @@ def process_image(image_path: Path, output_dir: Path) -> None: f"below 4KB threshold") except Exception as e: - print(f" āŒ Failed to process image: {e}") + print(f" āŒ Failed to process file: {e}") def main(): """Main function.""" if len(sys.argv) != 2: - print("Usage: python test_image_optimization.py ") + print("Usage: python test_image_optimization.py ") print("\nThis script will:") - print("- Analyze images for optimization opportunities") + print("- Analyze images and PDFs for optimization opportunities") print("- Save optimized versions with '_optimized' suffix") print("- Show before/after file sizes and savings") - print("- Allow you to visually compare original vs optimized") + print("- Allow you to compare original vs optimized files") sys.exit(1) input_path = Path(sys.argv[1]) @@ -183,35 +268,35 @@ def main(): sys.exit(1) # Collect image files - image_extensions = {'.png', '.jpg', '.jpeg', '.heif', '.heic'} + image_extensions = {'.png', '.jpg', '.jpeg', '.heif', '.heic', '.pdf'} if input_path.is_file(): if input_path.suffix.lower() not in image_extensions: - print(f"āŒ Not a supported image file: {input_path}") + print(f"āŒ Not a supported file: {input_path}") sys.exit(1) - image_files = [input_path] + files = [input_path] output_dir = input_path.parent else: - image_files: List[Path] = [] + files: List[Path] = [] for ext in image_extensions: - image_files.extend(input_path.glob(f"*{ext}")) - image_files.extend(input_path.glob(f"*{ext.upper()}")) + files.extend(input_path.glob(f"*{ext}")) + files.extend(input_path.glob(f"*{ext.upper()}")) - if not image_files: - print(f"āŒ No supported image files found in: {input_path}") + if not files: + print(f"āŒ No supported files found in: {input_path}") sys.exit(1) output_dir = input_path - print(f"šŸ” Found {len(image_files)} image(s) to process") + print(f"šŸ” Found {len(files)} file(s) to process") print(f"šŸ“ Output directory: {output_dir}") - # Process each image - for image_file in sorted(image_files): - process_image(image_file, output_dir) + # Process each file + for file in sorted(files): + process_file(file, output_dir) print(f"\n✨ Processing complete!") - print(f"šŸ“‚ Check {output_dir} for optimized images with '_optimized' suffix") - print("šŸ‘€ Open original and optimized images side-by-side to compare quality") + print(f"šŸ“‚ Check {output_dir} for optimized files with '_optimized' suffix") + print("šŸ‘€ Open original and optimized files side-by-side to compare quality") if __name__ == "__main__": diff --git a/src/launchpad/size/insights/apple/image_optimization.py b/src/launchpad/size/insights/apple/image_optimization.py index bb604831..8851129a 100644 --- a/src/launchpad/size/insights/apple/image_optimization.py +++ b/src/launchpad/size/insights/apple/image_optimization.py @@ -11,6 +11,7 @@ import pillow_heif # type: ignore from PIL import Image +from pypdf import PdfReader, PdfWriter from launchpad.size.insights.insight import Insight, InsightsInput from launchpad.size.models.common import FileInfo @@ -38,7 +39,7 @@ class _OptimizationResult: class ImageOptimizationInsight(Insight[ImageOptimizationInsightResult]): """Analyse image optimisation opportunities in iOS apps.""" - OPTIMIZABLE_FORMATS = {"png", "jpg", "jpeg", "heif", "heic"} + OPTIMIZABLE_FORMATS = {"png", "jpg", "jpeg", "heif", "heic", "pdf"} MIN_SAVINGS_THRESHOLD = 4096 TARGET_JPEG_QUALITY = 85 TARGET_HEIC_QUALITY = 85 @@ -104,8 +105,16 @@ def _analyze_image_optimization( if res := self._check_heic_minification(img, file_size): minify_savings, minified_size = res.savings, res.optimized_size except Exception as exc: - logger.error("Failed to process %s: %s", display_path, exc) - return None + if file_type.lower() == "pdf": + try: + if res := self._check_pdf_optimization(full_path, file_size): + minify_savings, minified_size = res.savings, res.optimized_size + except Exception as pdf_exc: + logger.error("Failed to process PDF %s: %s", display_path, pdf_exc) + return None + else: + logger.error("Failed to process %s: %s", display_path, exc) + return None if max(minify_savings, conversion_savings) < self.MIN_SAVINGS_THRESHOLD: return None @@ -154,6 +163,26 @@ def _check_heic_minification(self, img: Image.Image, file_size: int) -> _Optimiz logger.error("HEIC minification check failed: %s", exc) return None + def _check_pdf_optimization(self, file_path: Path, file_size: int) -> _OptimizationResult | None: + try: + reader = PdfReader(file_path) + writer = PdfWriter() + + for page in reader.pages: + page.compress_content_streams(level=9) + writer.add_page(page) + + writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) + + with io.BytesIO() as buf: + writer.write(buf) + new_size = buf.tell() + + return _OptimizationResult(file_size - new_size, new_size) if new_size < file_size else None + except Exception as exc: + logger.error("PDF optimization check failed: %s", exc) + return None + def _iter_optimizable_files(self, files: Sequence[FileInfo]) -> Iterable[FileInfo]: for fi in files: if fi.file_type == "car":