From 07983fdae4bed82497a1f10ea29e19ad6f00b999 Mon Sep 17 00:00:00 2001 From: chantskevin Date: Mon, 20 Apr 2026 16:04:54 +0800 Subject: [PATCH] =?UTF-8?q?video=5Fcompose:=20cuts=E2=86=92scenes=20adapte?= =?UTF-8?q?r=20for=20CinematicRenderer=20+=20dict=20profile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small fixes that matter for LLM-driven callers: 1. cuts → scenes adapter for CinematicRenderer The agent emits edit_decisions.cuts[] with a single consistent shape across every renderer_family. But the two Remotion compositions the tool targets read different props: - Explainer reads props.cuts[] {source, in_seconds, ...} - CinematicRenderer reads props.scenes[] {src, kind:"video", startSeconds, durationSeconds, ...} When renderer_family=cinematic-trailer / documentary-montage and props.cuts[] is present without props.scenes[], _remotion_render now transforms cuts→scenes in place (cumulative startSeconds, durationSeconds = out-in, in_seconds→trimBeforeSeconds, optional tone/filter/fade hints preserved). Without this, Cinematic compositions render 20–30s of pure black video with no error signal — scenes[] just defaults to []. 2. File-URI rewrite applied to scenes[] too The existing loop that converts absolute paths to file:// URIs only ran against cuts[]. After the adapter fires, scene sources need the same treatment. 3. Accept dict-form profile as well as string name `profile` is documented as a string key from media_profiles.py (e.g. "youtube_landscape"), but LLM callers frequently pass {"width": 1280, "height": 720, "fps": 30}. get_profile(name) then raises TypeError: unhashable type: 'dict' with no caller recourse. Accept both: dict → extract width/height directly; string → the existing get_profile lookup path. Note on the broader file-path limitation: Remotion's Chromium refuses `file:///` URLs for local-resource security ("Not allowed to load local resource"). Callers that pass absolute local paths still hit this — not regressed by this patch, just not solved. The long-term fix is serving local files over HTTP before invoking Remotion. In the meantime, callers with local files should front them with an HTTP server and pass the URL in cut.source. Co-Authored-By: Claude Opus 4.7 (1M context) --- tools/video/video_compose.py | 84 ++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/tools/video/video_compose.py b/tools/video/video_compose.py index 90d9959..00ca6e2 100644 --- a/tools/video/video_compose.py +++ b/tools/video/video_compose.py @@ -1306,15 +1306,68 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult: # Deep-copy props so we don't mutate the original props = json.loads(json.dumps(composition_data)) + # Resolve the target composition early so we can remap props to the + # shape that composition expects. Without this, Explainer-shaped + # `cuts[]` data handed to CinematicRenderer (which reads `scenes[]`) + # silently renders pure black — the scenes prop defaults to []. + renderer_family = (composition_data or {}).get("renderer_family", "explainer-data") + composition_id = self._get_composition_id(renderer_family) + + # Adapter: the agent emits a single `cuts[]` shape across every + # renderer_family, but CinematicRenderer reads props.scenes[] with a + # different field layout. Translate cuts → scenes when we're + # targeting CinematicRenderer and the caller didn't already supply + # scenes. This keeps the edit-director's output schema uniform + # (edit_decisions.cuts) without hard-coding composition-specific + # shapes into the agent's skill prose. + if composition_id == "CinematicRenderer" and "scenes" not in props: + cursor = 0.0 + scenes: list[dict[str, Any]] = [] + for idx, cut in enumerate(props.get("cuts", [])): + in_s = float(cut.get("in_seconds") or cut.get("source_in_seconds") or 0) + out_s = cut.get("out_seconds") + if out_s is None: + duration = float(cut.get("duration") or cut.get("duration_s") or 0) + else: + duration = max(float(out_s) - in_s, 0.0) + scene: dict[str, Any] = { + "id": cut.get("id") or f"scene-{idx}", + "kind": "video", + "src": cut.get("source") or cut.get("src") or "", + "startSeconds": round(cursor, 3), + "durationSeconds": round(duration, 3) if duration > 0 else 0, + } + if in_s > 0: + scene["trimBeforeSeconds"] = in_s + for hint_src, hint_dst in ( + ("tone", "tone"), + ("filter", "filter"), + ("fade_in_frames", "fadeInFrames"), + ("fade_out_frames", "fadeOutFrames"), + ): + if hint_src in cut: + scene[hint_dst] = cut[hint_src] + scenes.append(scene) + cursor += float(scene["durationSeconds"] or 0) + props["scenes"] = scenes + # Convert absolute file paths to file:// URIs for Remotion's - # Img and OffthreadVideo components - for cut in props.get("cuts", []): - source = cut.get("source", "") + # Img and OffthreadVideo components. Apply to whichever prop shape + # the composition actually consumes. + def _resolve_path_to_uri(obj: dict[str, Any], field: str) -> None: + source = obj.get(field, "") if source and not source.startswith(("http://", "https://", "file://")): resolved = Path(source).resolve() if resolved.exists(): posix = resolved.as_posix() - cut["source"] = f"file:///{posix}" if not posix.startswith("/") else f"file://{posix}" + obj[field] = ( + f"file:///{posix}" if not posix.startswith("/") else f"file://{posix}" + ) + + for cut in props.get("cuts", []): + _resolve_path_to_uri(cut, "source") + for scene in props.get("scenes", []): + _resolve_path_to_uri(scene, "src") # Build a custom themeConfig from the playbook's actual colors. # This ensures every video gets a unique visual identity derived @@ -1342,11 +1395,8 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult: error=f"Remotion composer project not found at {composer_dir}", ) - # Route to the correct Remotion composition based on renderer_family. - # This prevents all pipelines from collapsing into the Explainer visual grammar. - renderer_family = (composition_data or {}).get("renderer_family", "explainer-data") - composition_id = self._get_composition_id(renderer_family) - + # composition_id is already resolved above (pre-props-rewrite) so the + # scenes/cuts adapter could target the right shape. Reuse it here. cmd = [ "npx", "remotion", "render", str(composer_dir / "src" / "index.tsx"), @@ -1355,12 +1405,20 @@ def _remotion_render(self, inputs: dict[str, Any]) -> ToolResult: "--props", str(props_path), ] - # Apply media profile dimensions - profile_name = inputs.get("profile") - if profile_name: + # Apply media profile dimensions. `profile` is documented as a string + # name (e.g. "youtube_landscape"), but LLM-driven callers sometimes + # pass a dict like {"width": 1280, "height": 720, "fps": 30}. Accept + # both rather than erroring with TypeError: unhashable type: 'dict'. + profile_input = inputs.get("profile") + if isinstance(profile_input, dict): + w = profile_input.get("width") + h = profile_input.get("height") + if w and h: + cmd.extend(["--width", str(w), "--height", str(h)]) + elif isinstance(profile_input, str) and profile_input: try: from lib.media_profiles import get_profile - p = get_profile(profile_name) + p = get_profile(profile_input) cmd.extend(["--width", str(p.width), "--height", str(p.height)]) except (ImportError, ValueError): pass