use ffmpeg to generate waveform images for audio thumbnailsources

bornhack · Dec 18, 2024 · ca4b9d6 · ca4b9d6
1 parent 9835734
commit ca4b9d6
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 0 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,6 +13,7 @@ classifiers = [
 ]
 dependencies = [
     "exifread==3.0.0",
+    "ffmpeg-python==0.2.0",
     "httpx==0.27.2",
     "opencv-python==4.10.0.84",
     "pillow==11.0.0",

diff --git a/src/bma_client_lib/bma_client.py b/src/bma_client_lib/bma_client.py
@@ -13,6 +13,7 @@
 
 import cv2
 import exifread
+import ffmpeg
 import httpx
 import magic
 import pymupdf
@@ -186,6 +187,17 @@ def _handle_thumbnail_source_job(self, job: ThumbnailSourceJob, fileinfo: dict[s
             exif[0x131] = self.clientinfo["client_version"]
             job.exif = exif
             return
+        if fileinfo["filetype"] == "audio":
+            # use ffmpeg to generate a picture of the waveform
+            ss = self._get_audio_screenshot(job=job)
+            job.images = [ss]
+            exif = Image.Exif()
+            exif[0x100] = job.images[0].width
+            exif[0x101] = job.images[0].height
+            exif[0x10E] = f"ThumbnailSource for BMA audio file {job.basefile_uuid}"
+            exif[0x131] = self.clientinfo["client_version"]
+            job.exif = exif
+            return
 
         # unsupported filetype
         raise JobNotSupportedError(job=job)
@@ -217,6 +229,19 @@ def _get_document_screenshot(self, job: ThumbnailSourceJob, page: int = 0) -> Im
         pix = pdfpage.get_pixmap()
         return Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
 
+    def _get_audio_screenshot(self, job: ThumbnailSourceJob) -> Image.Image:
+        """Get a waveform screenshot. Requires ffmpeg as external dep."""
+        path = self.path / job.source_url[1:]
+        output, _ = (
+            ffmpeg.input(str(path))
+            .filter("compand")
+            .filter("showwavespic", s="1000x1000", split_channels=1)
+            .output("pipe:", format="webp", vframes="1")
+            # .overwrite_output()
+            .run(capture_stdout=True)
+        )
+        return Image.open(BytesIO(output))
+
     ###############################################################################
 
     def _write_and_upload_result(self, job: Job, filename: str) -> None: