#!/usr/bin/env python3
"""uprez_ab.py — Pre-ship A/B harness for Phase 10 frame uprez.

SYNTHESIS.md §Pre-Ship Condition mandates this sweep before Phase 10 ships
to any production workflow. Runs 3 content styles × 2 engines on caller-
supplied sample frames, collects validation results, and writes a Markdown
report to consultations/recoil/frame-uprezzing-for-retry-path/empirical_results.md.

Budget:
  3 samples × 2 engines × (uprez + validation)
  ≈ 3 × ((0.039 + 0.134) + 2 × 0.003) = ~$0.537 on uprez + validation.
  SYNTHESIS estimates ~$1.03 inclusive of reruns. Cap at $2.00 via sample count.

Usage:
    python3 pipeline/tools/empirical/uprez_ab.py \
      --cartoon path/to/cartoon_sample.png \
      --photoreal path/to/photoreal_sample.jpg \
      --stylized path/to/stylized_sample.png \
      --aspect-ratio 16:9 \
      --out consultations/recoil/frame-uprezzing-for-retry-path/empirical_results.md

The `--stylized` sample is optional per SYNTHESIS ("if available"). If omitted,
the harness runs 2-style × 2-engine = 4 calls (~$0.34).
"""

from __future__ import annotations

import argparse
import sys
from pathlib import Path
from typing import Optional

_RECOIL_ROOT = Path(__file__).resolve().parents[2]
if str(_RECOIL_ROOT) not in sys.path:
    sys.path.insert(0, str(_RECOIL_ROOT))

from recoil.pipeline._lib import frame_uprez as U  # noqa: E402  # type: ignore


STYLE_FOR_FLAG = {
    "cartoon": "cartoon_2d",
    "photoreal": "photorealistic",
    "stylized": "stylized",
}


def run_one(source: Path, style: str, engine: str, aspect_ratio: str) -> dict:
    r = U.uprez_frame(source=source, aspect_ratio=aspect_ratio, engine=engine, style=style, validate=True)
    return {
        "source": str(source),
        "style": style,
        "engine": engine,
        "aspect_ratio": aspect_ratio,
        "success": r.get("success", False),
        "error": r.get("error"),
        "engine_used": r.get("engine_used"),
        "cost": r.get("cost"),
        "validation": r.get("validation"),
    }


def format_report(results: list[dict]) -> str:
    lines = [
        "# Frame Uprez — Pre-Ship A/B Results",
        "",
        "Sweep per SYNTHESIS §Pre-Ship Condition (3-style × 2-engine on sample frames).",
        "",
        "| Sample | Style | Engine | Success | Histogram | Rubric all_yes | Cost |",
        "|---|---|---|---|---|---|---|",
    ]
    total_cost = 0.0
    for r in results:
        v = r.get("validation") or {}
        hist = (v.get("histogram") or {}).get("correlation") if v else None
        rub = (v.get("rubric") or {}).get("all_yes") if v else None
        cost = r.get("cost") or 0.0
        total_cost += cost
        lines.append(
            f"| `{Path(r['source']).name}` | {r['style']} | {r['engine']} | "
            f"{'PASS' if r['success'] else 'FAIL'} | "
            f"{hist if hist is not None else '-'} | "
            f"{rub if rub is not None else '-'} | "
            f"${cost:.4f} |"
        )
    lines.append("")
    lines.append(f"**Total cost:** ${total_cost:.4f}")
    lines.append("")
    lines.append("## Per-axis reasons (Layer 2 only)")
    for r in results:
        v = r.get("validation") or {}
        rub = v.get("rubric") or {}
        if not rub or rub.get("all_yes"):
            continue
        axes = rub.get("axes") or {}
        if not axes:
            continue
        lines.append(f"### `{Path(r['source']).name}` · {r['style']} · {r['engine']}")
        for ax_name, ax in axes.items():
            mark = "YES" if ax.get("passed") else f"NO — {ax.get('reason') or ''}"
            lines.append(f"- {ax_name}: {mark}")
        lines.append("")
    lines.append("## Decision rule (SYNTHESIS §Disagreement A)")
    lines.append("")
    lines.append("- If default rule (NB2 for stylized, NBP for photoreal) survives: ship as-is.")
    lines.append("- If NB2 fails on photoreal content OR NBP applies 3D shading to 2D cartoon: adjust default table.")
    return "\n".join(lines) + "\n"


def main(argv: Optional[list[str]] = None) -> int:
    p = argparse.ArgumentParser(prog="uprez_ab.py")
    p.add_argument("--cartoon", required=True, type=Path, help="2D cartoon sample")
    p.add_argument("--photoreal", required=True, type=Path, help="Photorealistic sample")
    p.add_argument("--stylized", type=Path, default=None, help="Optional stylized/anime sample")
    p.add_argument("--aspect-ratio", default="16:9")
    p.add_argument("--out", type=Path, default=Path(_RECOIL_ROOT).parent / "consultations" / "recoil" /
                   "frame-uprezzing-for-retry-path" / "empirical_results.md")
    args = p.parse_args(argv)

    samples: list[tuple[Path, str]] = [
        (args.cartoon, "cartoon_2d"),
        (args.photoreal, "photorealistic"),
    ]
    if args.stylized and args.stylized.exists():
        samples.append((args.stylized, "stylized"))

    results: list[dict] = []
    for src, style in samples:
        for engine in ("nb2", "nbp"):
            print(f"[uprez-ab] {src.name} · style={style} · engine={engine}")
            results.append(run_one(src, style, engine, args.aspect_ratio))

    args.out.parent.mkdir(parents=True, exist_ok=True)
    args.out.write_text(format_report(results), encoding="utf-8")
    print(f"[uprez-ab] wrote {args.out}")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
