From 07983fdae4bed82497a1f10ea29e19ad6f00b999 Mon Sep 17 00:00:00 2001
From: chantskevin <chantskevin@gmail.com>
Date: Mon, 20 Apr 2026 16:04:54 +0800
Subject: [PATCH] =?UTF-8?q?video=5Fcompose:=20cuts=E2=86=92scenes=20adapte?=
 =?UTF-8?q?r=20for=20CinematicRenderer=20+=20dict=20profile?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three small fixes that matter for LLM-driven callers:

1. cuts → scenes adapter for CinematicRenderer
   The agent emits edit_decisions.cuts[] with a single consistent
   shape across every renderer_family. But the two Remotion compositions
   the tool targets read different props:
     - Explainer             reads props.cuts[]   {source, in_seconds, ...}
     - CinematicRenderer     reads props.scenes[] {src, kind:"video",
                                                   startSeconds,
                                                   durationSeconds, ...}
   When renderer_family=cinematic-trailer / documentary-montage and
   props.cuts[] is present without props.scenes[], _remotion_render
   now transforms cuts→scenes in place (cumulative startSeconds,
   durationSeconds = out-in, in_seconds→trimBeforeSeconds, optional
   tone/filter/fade hints preserved).
   Without this, Cinematic compositions render 20–30s of pure black
   video with no error signal — scenes[] just defaults to [].

2. File-URI rewrite applied to scenes[] too
   The existing loop that converts absolute paths to file:// URIs only
   ran against cuts[]. After the adapter fires, scene sources need the
   same treatment.

3. Accept dict-form profile as well as string name
   `profile` is documented as a string key from media_profiles.py
   (e.g. "youtube_landscape"), but LLM callers frequently pass
   {"width": 1280, "height": 720, "fps": 30}. get_profile(name) then
   raises TypeError: unhashable type: 'dict' with no caller recourse.
   Accept both: dict → extract width/height directly; string → the
   existing get_profile lookup path.

Note on the broader file-path limitation:
  Remotion's Chromium refuses `file:///` URLs for local-resource
  security ("Not allowed to load local resource"). Callers that pass
  absolute local paths still hit this — not regressed by this patch,
  just not solved. The long-term fix is serving local files over HTTP
  before invoking Remotion. In the meantime, callers with local files
  should front them with an HTTP server and pass the URL in cut.source.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tools/video/video_compose.py | 84 ++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 13 deletions(-)

diff --git a/tools/video/video_compose.py b/tools/video/video_compose.py
index 90d9959..00ca6e2 100644
--- a/tools/video/video_compose.py
+++ b/tools/video/video_compose.py
@@ -1306,15 +1306,68 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult:
         # Deep-copy props so we don't mutate the original
         props = json.loads(json.dumps(composition_data))
 
+        # Resolve the target composition early so we can remap props to the
+        # shape that composition expects. Without this, Explainer-shaped
+        # `cuts[]` data handed to CinematicRenderer (which reads `scenes[]`)
+        # silently renders pure black — the scenes prop defaults to [].
+        renderer_family = (composition_data or {}).get("renderer_family", "explainer-data")
+        composition_id = self._get_composition_id(renderer_family)
+
+        # Adapter: the agent emits a single `cuts[]` shape across every
+        # renderer_family, but CinematicRenderer reads props.scenes[] with a
+        # different field layout. Translate cuts → scenes when we're
+        # targeting CinematicRenderer and the caller didn't already supply
+        # scenes. This keeps the edit-director's output schema uniform
+        # (edit_decisions.cuts) without hard-coding composition-specific
+        # shapes into the agent's skill prose.
+        if composition_id == "CinematicRenderer" and "scenes" not in props:
+            cursor = 0.0
+            scenes: list[dict[str, Any]] = []
+            for idx, cut in enumerate(props.get("cuts", [])):
+                in_s = float(cut.get("in_seconds") or cut.get("source_in_seconds") or 0)
+                out_s = cut.get("out_seconds")
+                if out_s is None:
+                    duration = float(cut.get("duration") or cut.get("duration_s") or 0)
+                else:
+                    duration = max(float(out_s) - in_s, 0.0)
+                scene: dict[str, Any] = {
+                    "id": cut.get("id") or f"scene-{idx}",
+                    "kind": "video",
+                    "src": cut.get("source") or cut.get("src") or "",
+                    "startSeconds": round(cursor, 3),
+                    "durationSeconds": round(duration, 3) if duration > 0 else 0,
+                }
+                if in_s > 0:
+                    scene["trimBeforeSeconds"] = in_s
+                for hint_src, hint_dst in (
+                    ("tone", "tone"),
+                    ("filter", "filter"),
+                    ("fade_in_frames", "fadeInFrames"),
+                    ("fade_out_frames", "fadeOutFrames"),
+                ):
+                    if hint_src in cut:
+                        scene[hint_dst] = cut[hint_src]
+                scenes.append(scene)
+                cursor += float(scene["durationSeconds"] or 0)
+            props["scenes"] = scenes
+
         # Convert absolute file paths to file:// URIs for Remotion's
-        # Img and OffthreadVideo components
-        for cut in props.get("cuts", []):
-            source = cut.get("source", "")
+        # Img and OffthreadVideo components. Apply to whichever prop shape
+        # the composition actually consumes.
+        def _resolve_path_to_uri(obj: dict[str, Any], field: str) -> None:
+            source = obj.get(field, "")
             if source and not source.startswith(("http://", "https://", "file://")):
                 resolved = Path(source).resolve()
                 if resolved.exists():
                     posix = resolved.as_posix()
-                    cut["source"] = f"file:///{posix}" if not posix.startswith("/") else f"file://{posix}"
+                    obj[field] = (
+                        f"file:///{posix}" if not posix.startswith("/") else f"file://{posix}"
+                    )
+
+        for cut in props.get("cuts", []):
+            _resolve_path_to_uri(cut, "source")
+        for scene in props.get("scenes", []):
+            _resolve_path_to_uri(scene, "src")
 
         # Build a custom themeConfig from the playbook's actual colors.
         # This ensures every video gets a unique visual identity derived
@@ -1342,11 +1395,8 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult:
                 error=f"Remotion composer project not found at {composer_dir}",
             )
 
-        # Route to the correct Remotion composition based on renderer_family.
-        # This prevents all pipelines from collapsing into the Explainer visual grammar.
-        renderer_family = (composition_data or {}).get("renderer_family", "explainer-data")
-        composition_id = self._get_composition_id(renderer_family)
-
+        # composition_id is already resolved above (pre-props-rewrite) so the
+        # scenes/cuts adapter could target the right shape. Reuse it here.
         cmd = [
             "npx", "remotion", "render",
             str(composer_dir / "src" / "index.tsx"),
@@ -1355,12 +1405,20 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult:
             "--props", str(props_path),
         ]
 
-        # Apply media profile dimensions
-        profile_name = inputs.get("profile")
-        if profile_name:
+        # Apply media profile dimensions. `profile` is documented as a string
+        # name (e.g. "youtube_landscape"), but LLM-driven callers sometimes
+        # pass a dict like {"width": 1280, "height": 720, "fps": 30}. Accept
+        # both rather than erroring with TypeError: unhashable type: 'dict'.
+        profile_input = inputs.get("profile")
+        if isinstance(profile_input, dict):
+            w = profile_input.get("width")
+            h = profile_input.get("height")
+            if w and h:
+                cmd.extend(["--width", str(w), "--height", str(h)])
+        elif isinstance(profile_input, str) and profile_input:
             try:
                 from lib.media_profiles import get_profile
-                p = get_profile(profile_name)
+                p = get_profile(profile_input)
                 cmd.extend(["--width", str(p.width), "--height", str(p.height)])
             except (ImportError, ValueError):
                 pass