diff --git a/paperbanana/studio/app.py b/paperbanana/studio/app.py index 13bf7372..db9dbd10 100644 --- a/paperbanana/studio/app.py +++ b/paperbanana/studio/app.py @@ -794,14 +794,36 @@ def _do_composite( rb_inp = gr.Textbox(label="run_input.json (preview)", lines=10) rb_gal = gr.Gallery(label="Iteration thumbnails", columns=4, height=220) bb_report = gr.Textbox(label="batch_report.json (preview)", lines=14) + gr.Markdown("### Run Compare") + with gr.Row(): + cmp_left = gr.Dropdown( + label="Left run", + choices=[], + allow_custom_value=True, + ) + cmp_right = gr.Dropdown( + label="Right run", + choices=[], + allow_custom_value=True, + ) + cmp_go = gr.Button("Compare runs") + with gr.Row(): + cmp_left_img = gr.Image(label="Left final output", type="filepath") + cmp_right_img = gr.Image(label="Right final output", type="filepath") + cmp_diff = gr.Markdown() + cmp_left_details = gr.Textbox(label="Left run details", lines=12) + cmp_right_details = gr.Textbox(label="Right run details", lines=12) def _refresh(od: str): root = (od or default_output_dir).strip() or default_output_dir r = runs_mod.list_run_ids(root) b = runs_mod.list_batch_ids(root) + left_default = r[-2] if len(r) >= 2 else (r[-1] if r else None) return ( gr.update(choices=r, value=r[-1] if r else None), gr.update(choices=b, value=b[-1] if b else None), + gr.update(choices=r, value=left_default), + gr.update(choices=r, value=r[-1] if r else None), ) def _show_run(od: str, rid: Optional[str]): @@ -822,10 +844,63 @@ def _show_batch(od: str, bid: Optional[str]): s = runs_mod.load_batch_summary(root, bid) return s.get("report_preview") or "" + def _render_compare_details(data: dict[str, Any]) -> str: + keys = [ + "run_id", + "diagram_type", + "caption", + "aspect_ratio", + "vlm_provider", + "vlm_model", + "image_provider", + "image_model", + "output_format", + "refinement_iterations", + "auto_refine", + "max_iterations", + "seed", + "duration_seconds", + "total_cost_usd", + ] + lines: list[str] = [] + for key in keys: + lines.append(f"{key}: {data.get(key)}") + return "\n".join(lines) + + def _show_compare(od: str, left_id: Optional[str], right_id: Optional[str]): + if not left_id or not right_id: + msg = "Select both runs to compare." + return None, None, msg, "", "" + root = (od or default_output_dir).strip() or default_output_dir + cmp = runs_mod.compare_runs(root, left_id, right_id) + if cmp.get("error"): + msg = str(cmp["error"]) + return None, None, msg, "", "" + left = cmp.get("left") or {} + right = cmp.get("right") or {} + diffs = cmp.get("diffs") or [] + if not diffs: + diff_md = "No differences detected in tracked comparison fields." + else: + rows = ["### Differences", ""] + for d in diffs: + field = d.get("field") + left_val = d.get("left") + right_val = d.get("right") + rows.append(f"- `{field}`: left=`{left_val}` | right=`{right_val}`") + diff_md = "\n".join(rows) + return ( + left.get("final_image"), + right.get("final_image"), + diff_md, + _render_compare_details(left), + _render_compare_details(right), + ) + rb_refresh.click( _refresh, inputs=[out_dir], - outputs=[run_pick, batch_pick], + outputs=[run_pick, batch_pick, cmp_left, cmp_right], ) run_pick.change( _show_run, @@ -837,6 +912,17 @@ def _show_batch(od: str, bid: Optional[str]): inputs=[out_dir, batch_pick], outputs=[bb_report], ) + cmp_go.click( + _show_compare, + inputs=[out_dir, cmp_left, cmp_right], + outputs=[ + cmp_left_img, + cmp_right_img, + cmp_diff, + cmp_left_details, + cmp_right_details, + ], + ) gr.Markdown( "---\n" diff --git a/paperbanana/studio/runs.py b/paperbanana/studio/runs.py index 0aabd48e..a40da5fc 100644 --- a/paperbanana/studio/runs.py +++ b/paperbanana/studio/runs.py @@ -7,6 +7,16 @@ from typing import Any, Optional +def _read_json_file(path: Path) -> Optional[dict[str, Any]]: + if not path.is_file(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + return data if isinstance(data, dict) else None + + def list_run_ids(output_dir: str) -> list[str]: """Return run directory names (``run_*``), oldest first.""" root = Path(output_dir) @@ -61,21 +71,19 @@ def load_run_summary(output_dir: str, run_id: str) -> dict[str, Any]: out["final_image"] = str(final.resolve()) meta_path = run_dir / "metadata.json" - if meta_path.is_file(): + meta_data = _read_json_file(meta_path) + if meta_data is not None: out["metadata_path"] = str(meta_path) - try: - data = json.loads(meta_path.read_text(encoding="utf-8")) - out["metadata_preview"] = json.dumps(data, indent=2)[:12000] - except (OSError, json.JSONDecodeError) as e: - out["metadata_preview"] = f"(could not read metadata: {e})" + out["metadata_preview"] = json.dumps(meta_data, indent=2)[:12000] + elif meta_path.is_file(): + out["metadata_preview"] = "(could not read metadata)" inp_path = run_dir / "run_input.json" - if inp_path.is_file(): - try: - raw = json.loads(inp_path.read_text(encoding="utf-8")) - out["run_input_preview"] = json.dumps(raw, indent=2)[:8000] - except (OSError, json.JSONDecodeError) as e: - out["run_input_preview"] = f"(could not read run_input: {e})" + inp_data = _read_json_file(inp_path) + if inp_data is not None: + out["run_input_preview"] = json.dumps(inp_data, indent=2)[:8000] + elif inp_path.is_file(): + out["run_input_preview"] = "(could not read run_input)" def _iter_sort_key(d: Path) -> int: parts = d.name.split("_", 1) @@ -98,6 +106,74 @@ def _iter_sort_key(d: Path) -> int: return out +def _collect_compare_fields(run_dir: Path) -> dict[str, Any]: + meta = _read_json_file(run_dir / "metadata.json") or {} + run_input = _read_json_file(run_dir / "run_input.json") or {} + final_image = _find_final_image(run_dir) + + settings = meta.get("settings") + if not isinstance(settings, dict): + settings = {} + + return { + "run_id": run_dir.name, + "run_dir": str(run_dir.resolve()), + "final_image": str(final_image.resolve()) if final_image else None, + "caption": run_input.get("communicative_intent"), + "diagram_type": run_input.get("diagram_type"), + "aspect_ratio": run_input.get("aspect_ratio"), + "vlm_provider": settings.get("vlm_provider"), + "vlm_model": settings.get("vlm_model"), + "image_provider": settings.get("image_provider"), + "image_model": settings.get("image_model"), + "output_format": settings.get("output_format"), + "refinement_iterations": settings.get("refinement_iterations"), + "auto_refine": settings.get("auto_refine"), + "max_iterations": settings.get("max_iterations"), + "seed": settings.get("seed"), + "created_at": meta.get("created_at") or meta.get("timestamp"), + "duration_seconds": meta.get("duration_seconds"), + "total_cost_usd": meta.get("total_cost_usd") or meta.get("cost_usd"), + } + + +def compare_runs(output_dir: str, left_run_id: str, right_run_id: str) -> dict[str, Any]: + """Return side-by-side run metadata and a compact diff summary.""" + root = Path(output_dir) + left_dir = root / left_run_id + right_dir = root / right_run_id + if not left_dir.is_dir(): + return {"error": f"Run directory not found: {left_run_id}"} + if not right_dir.is_dir(): + return {"error": f"Run directory not found: {right_run_id}"} + + left = _collect_compare_fields(left_dir) + right = _collect_compare_fields(right_dir) + + keys = [ + "caption", + "diagram_type", + "aspect_ratio", + "vlm_provider", + "vlm_model", + "image_provider", + "image_model", + "output_format", + "refinement_iterations", + "auto_refine", + "max_iterations", + "seed", + "duration_seconds", + "total_cost_usd", + ] + diffs: list[dict[str, Any]] = [] + for key in keys: + if left.get(key) != right.get(key): + diffs.append({"field": key, "left": left.get(key), "right": right.get(key)}) + + return {"left": left, "right": right, "diffs": diffs} + + def load_batch_summary(output_dir: str, batch_id: str) -> dict[str, Any]: """Load batch_report.json summary if present.""" batch_dir = Path(output_dir) / batch_id