#!/usr/bin/env python3
"""Seedance JSON Prompt Structure A/B Test.

Tests whether JSON-structured prompts produce better results than prose
prompts for Seedance 2.0. If JSON keys render as text overlays, abandon
immediately (GO/NO-GO gate).

Three test cases cover the range of shot complexity:
  1. Simple scene — single character, minimal action
  2. Action scene — fast motion, tracking camera
  3. Detailed environment — wide establishing, rich spatial detail

Usage:
    # Dry run — print prompts and GO/NO-GO criteria (free)
    python3 tools/seedance_json_ab_test.py --project tartarus --dry-run

    # Generate and compare ($$$)
    python3 tools/seedance_json_ab_test.py --project tartarus

    # Custom output directory
    python3 tools/seedance_json_ab_test.py --project tartarus \\
        --output-dir pipeline/output/ab_tests/json_custom
"""

import argparse
import json
import random
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT))

from recoil.pipeline.core.cost import read_cost_from_result, read_cost_from_record_safe  # noqa: E402

# ──────────────────────────────────────────────────────────────────────
# Test Cases — same content in prose vs JSON format
# ──────────────────────────────────────────────────────────────────────

TEST_CASES = [
    {
        "name": "simple_scene",
        "description": "Single character, minimal action, static camera",
        "prose": (
            "A young woman with dark shoulder-length hair stands at a rain-slicked "
            "rooftop edge at night, neon signs reflected in puddles around her boots. "
            "Her expression is calm and resolute. She turns slowly to face the camera. "
            "Static medium shot, 50mm lens. Shot on Kodak Vision3 500T, motivated "
            "lighting from neon signage. 4K, Ultra HD, rich details, sharp clarity, "
            "cinematic texture, natural colors, stable picture. No music, no score."
        ),
        "json": json.dumps({
            "subject": (
                "A young woman with dark shoulder-length hair stands at a rain-slicked "
                "rooftop edge at night, neon signs reflected in puddles around her boots"
            ),
            "emotion": "Calm and resolute expression",
            "action": "She turns slowly to face the camera",
            "camera": "Static medium shot, 50mm lens",
            "style": "Shot on Kodak Vision3 500T, motivated lighting from neon signage",
            "quality": (
                "4K, Ultra HD, rich details, sharp clarity, cinematic texture, "
                "natural colors, stable picture"
            ),
            "audio": "No music, no score",
        }),
        "duration": 5,
    },
    {
        "name": "action_scene",
        "description": "Fast motion, tracking camera, multiple environmental details",
        "prose": (
            "Kira Tanaka sprints across a rain-slicked concrete rooftop, arms pumping, "
            "tactical jacket billowing behind her. Neon signs flicker below, puddles "
            "splash underfoot. Her face is locked in fierce concentration, teeth bared "
            "with adrenaline. She vaults over a low concrete barrier and tucks into a "
            "roll. Tracking medium-long shot on 24mm lens. Shot on Kodak Vision3 500T, "
            "hard light from neon signs above, soft ambient city glow. Motivated "
            "lighting, practical light sources visible in frame. 4K, Ultra HD, rich "
            "details, sharp clarity, cinematic texture, natural colors, stable picture. "
            "No music, no score."
        ),
        "json": json.dumps({
            "subject": (
                "Kira Tanaka sprints across a rain-slicked concrete rooftop, arms pumping, "
                "tactical jacket billowing behind her. Neon signs flicker below, puddles "
                "splash underfoot"
            ),
            "emotion": "Fierce concentration, teeth bared with adrenaline",
            "action": "She vaults over a low concrete barrier and tucks into a roll",
            "camera": "Tracking medium-long shot on 24mm lens",
            "style": (
                "Shot on Kodak Vision3 500T, hard light from neon signs above, "
                "soft ambient city glow. Motivated lighting, practical light sources "
                "visible in frame"
            ),
            "quality": (
                "4K, Ultra HD, rich details, sharp clarity, cinematic texture, "
                "natural colors, stable picture"
            ),
            "audio": "No music, no score",
        }),
        "duration": 5,
    },
    {
        "name": "detailed_environment",
        "description": "Wide establishing shot, no character, rich spatial detail",
        "prose": (
            "A glass-walled corner office at the apex of a skyscraper stretches out in "
            "cold minimalist splendor. Floor-to-ceiling windows reveal a glittering "
            "cityscape panorama spread to the horizon. A single halogen desk lamp casts "
            "a cone of warm light on an empty leather chair. The stillness suggests "
            "absence. The desk lamp flickers once, casting brief shadows across the "
            "polished floor, then steadies. Slow crane shot on 16mm wide-angle lens. "
            "Shot on Kodak Vision3 500T, warm tungsten bounce from desk lamp, soft city "
            "glow from behind. Volumetric dust particles in the lamp beam. 4K, Ultra "
            "HD, rich details, sharp clarity, cinematic texture, natural colors, stable "
            "picture. No music, no score."
        ),
        "json": json.dumps({
            "subject": (
                "A glass-walled corner office at the apex of a skyscraper stretches out "
                "in cold minimalist splendor. Floor-to-ceiling windows reveal a glittering "
                "cityscape panorama. A single halogen desk lamp casts a cone of warm light "
                "on an empty leather chair"
            ),
            "emotion": "The stillness suggests absence",
            "action": (
                "The desk lamp flickers once, casting brief shadows across the polished "
                "floor, then steadies"
            ),
            "camera": "Slow crane shot on 16mm wide-angle lens",
            "style": (
                "Shot on Kodak Vision3 500T, warm tungsten bounce from desk lamp, "
                "soft city glow from behind. Volumetric dust particles in the lamp beam"
            ),
            "quality": (
                "4K, Ultra HD, rich details, sharp clarity, cinematic texture, "
                "natural colors, stable picture"
            ),
            "audio": "No music, no score",
        }),
        "duration": 7,
    },
]

# ──────────────────────────────────────────────────────────────────────
# GO/NO-GO criteria
# ──────────────────────────────────────────────────────────────────────

GO_NO_GO_CRITERIA = [
    "JSON keys (subject, action, camera, etc.) must NOT render as visible text overlays in the generated video.",
    "If ANY test case shows JSON key names burned into the video frame, the entire JSON approach is ABANDONED.",
    "Visual quality must be at least comparable to prose — no degradation in motion coherence, lighting, or composition.",
    "Both variants must complete generation without API errors.",
]


def print_go_no_go():
    """Print GO/NO-GO criteria for manual review."""
    print("\n" + "=" * 60)
    print("GO/NO-GO CRITERIA — JSON Prompt Structure")
    print("=" * 60)
    for i, criterion in enumerate(GO_NO_GO_CRITERIA, 1):
        print(f"  {i}. {criterion}")
    print()
    print("DECISION RULE: If criterion 1 or 2 fails on ANY test case,")
    print("the JSON prompt approach is abandoned immediately. Do not")
    print("proceed to a full A/B test.")
    print("=" * 60)


# ──────────────────────────────────────────────────────────────────────
# Dry-run mode
# ──────────────────────────────────────────────────────────────────────

def dry_run(output_dir: Path):
    """Print all test cases and GO/NO-GO criteria without generating."""
    print("\n" + "=" * 60)
    print("Seedance JSON Prompt A/B Test — DRY RUN")
    print("=" * 60)

    for case in TEST_CASES:
        print(f"\n--- {case['name']}: {case['description']} ---")
        print(f"  Duration: {case['duration']}s")
        print(f"\n  PROSE ({len(case['prose'].split())} words):")
        print(f"    {case['prose'][:200]}...")
        print(f"\n  JSON ({len(case['json'].split())} tokens):")
        # Pretty-print JSON for readability
        parsed = json.loads(case["json"])
        for key, val in parsed.items():
            val_preview = str(val)[:80]
            print(f"    {key}: {val_preview}...")
        print()

    print_go_no_go()

    print(f"\nOutput directory (when run for real): {output_dir}")
    print(f"Estimated cost: ${len(TEST_CASES) * 2 * 5 * 0.3034:.2f} "
          f"({len(TEST_CASES)} cases x 2 variants x ~5s avg)")


# ──────────────────────────────────────────────────────────────────────
# Generation mode
# ──────────────────────────────────────────────────────────────────────

def run_generation(project: str, output_dir: Path):
    """Generate both prose and JSON variants for each test case.

    Uses StepRunner.execute_video() — the unified generation path that flows
    through ExecutionStore so results appear in Dailies.
    """
    output_dir.mkdir(parents=True, exist_ok=True)

    from recoil.execution.execution_store import ExecutionStore
    from recoil.execution.step_runner import StepRunner
    from recoil.execution.step_types import ProjectPaths
    from recoil.pipeline.core.dispatch import dispatch
    from recoil.pipeline.core.dispatch_context import DispatchContext

    store = ExecutionStore(project)
    # Use episode 999 to avoid colliding with real production data
    paths = ProjectPaths.for_episode(project, 999)
    runner = StepRunner(store=store, paths=paths, episode=999)

    ctx = DispatchContext(
        caller_id="seedance_json_ab",
        step_runner=runner,
        project=project,
        episode=999,
    )

    print(f"\nGenerating {len(TEST_CASES)} test cases x 2 variants (prose + JSON)")
    print(f"Estimated cost: ${len(TEST_CASES) * 2 * 5 * 0.3034:.2f}")
    print()

    results = []
    for case in TEST_CASES:
        name = case["name"]
        duration = case["duration"]

        for variant, prompt in [("prose", case["prose"]),
                                ("json", case["json"])]:
            shot_id = f"JSON_AB_{name}_{variant}".upper()
            print(f"  [{shot_id}] Generating ({duration}s)...",
                  end=" ", flush=True)
            start = time.time()

            try:
                receipt = dispatch(
                    "video_i2v",
                    {
                        "shot_id": shot_id,
                        "prompt": prompt,
                        "model": "seeddance-2.0",
                        "duration": duration,
                        "aspect_ratio": "9:16",
                        "generate_audio": False,
                    },
                    context=ctx,
                )
                result = receipt.run_result
                latency = time.time() - start
                success = result.success
                output_path = str(result.output_path or "")
                cost = read_cost_from_result(result)
                if success:
                    print(f"OK ({latency:.0f}s, ${cost:.4f})")
                else:
                    error = getattr(result, "error", "unknown")
                    print(f"FAIL: {error}")
                results.append({
                    "case": name,
                    "variant": variant,
                    "shot_id": shot_id,
                    "prompt": prompt,
                    "prompt_word_count": len(prompt.split()),
                    "duration": duration,
                    "success": success,
                    "output_path": output_path,
                    "latency_s": round(latency, 1),
                    "cost_usd": cost,
                })
            except Exception as e:
                latency = time.time() - start
                print(f"FAILED: {e}")
                results.append({
                    "case": name,
                    "variant": variant,
                    "shot_id": shot_id,
                    "prompt": prompt,
                    "prompt_word_count": len(prompt.split()),
                    "duration": duration,
                    "success": False,
                    "error": str(e),
                    "latency_s": round(latency, 1),
                    "cost_usd": 0.0,
                })

    # Save results
    results_file = output_dir / "json_test_results.json"
    with open(results_file, "w") as f:
        json.dump({
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "project": project,
            "test_cases": len(TEST_CASES),
            "go_no_go_criteria": GO_NO_GO_CRITERIA,
            "results": results,
        }, f, indent=2)
    print(f"\nResults saved: {results_file}")

    # Generate comparison HTML
    _write_comparison_html(results, output_dir)

    # Print summary
    total_cost = sum(read_cost_from_record_safe(r) for r in results)
    successes = sum(1 for r in results if r.get("success"))
    print(f"\n{successes}/{len(results)} generations succeeded, "
          f"${total_cost:.4f} total")

    print_go_no_go()


def _write_comparison_html(results: list[dict], output_dir: Path):
    """Generate blind comparison HTML for review."""
    html_path = output_dir / "json_ab_comparison.html"

    # Group results by test case
    by_case = {}
    for r in results:
        case_name = r["case"]
        if case_name not in by_case:
            by_case[case_name] = {}
        by_case[case_name][r["variant"]] = r

    rows = []
    for case in TEST_CASES:
        name = case["name"]
        case_results = by_case.get(name, {})
        prose_r = case_results.get("prose", {})
        json_r = case_results.get("json", {})

        if not prose_r.get("success") or not json_r.get("success"):
            rows.append(
                f'<div class="comparison failed">'
                f'<h3>{name}: {case["description"]}</h3>'
                f'<p class="error">One or both variants failed to generate.</p>'
                f'</div>'
            )
            continue

        # Randomize A/B order for blind review
        if random.random() > 0.5:
            a_path, b_path = prose_r["output_path"], json_r["output_path"]
            a_label, b_label = "prose", "json"
        else:
            a_path, b_path = json_r["output_path"], prose_r["output_path"]
            a_label, b_label = "json", "prose"

        rows.append(
            f'<div class="comparison">'
            f'<h3>{name}: {case["description"]}</h3>'
            f'<div class="videos">'
            f'<div class="variant"><h4>Variant A</h4>'
            f'<video controls width="360">'
            f'<source src="{a_path}" type="video/mp4"></video>'
            f'<p class="reveal" data-lang="{a_label}">Click to reveal</p></div>'
            f'<div class="variant"><h4>Variant B</h4>'
            f'<video controls width="360">'
            f'<source src="{b_path}" type="video/mp4"></video>'
            f'<p class="reveal" data-lang="{b_label}">Click to reveal</p></div>'
            f'</div>'
            f'<div class="go-no-go">'
            f'<h4>GO/NO-GO Check</h4>'
            f'<label><input type="checkbox" class="check-overlay"> '
            f'JSON keys visible as text overlays?</label><br>'
            f'<label><input type="checkbox" class="check-quality"> '
            f'Quality at least comparable to prose?</label>'
            f'</div></div>'
        )

    html = (
        '<!DOCTYPE html><html><head>'
        '<title>Seedance JSON Structure A/B Test</title>'
        '<style>'
        'body{font-family:-apple-system,sans-serif;margin:2em;'
        'background:#1a1a2e;color:#e0e0e0}'
        '.comparison{margin:2em 0;padding:1em;border:1px solid #333;'
        'border-radius:8px}'
        '.comparison.failed{border-color:#f44;opacity:0.6}'
        '.error{color:#f44}'
        '.videos{display:flex;gap:2em}'
        '.variant{flex:1}'
        '.reveal{cursor:pointer;color:#888;font-style:italic}'
        '.reveal.shown{color:#4fc3f7;font-weight:bold}'
        '.go-no-go{margin-top:1em;padding:0.8em;background:#222;'
        'border-radius:4px}'
        '.go-no-go label{display:block;margin:0.3em 0;cursor:pointer}'
        '</style>'
        '<script>'
        "document.addEventListener('click',e=>{"
        "if(e.target.classList.contains('reveal')){"
        "e.target.textContent=e.target.dataset.lang.toUpperCase();"
        "e.target.classList.add('shown')}});"
        '</script></head><body>'
        '<h1>Seedance 2.0 — JSON Prompt Structure A/B Test</h1>'
        f'<p>Generated: {datetime.now(timezone.utc).isoformat()}</p>'
        '<h2>GO/NO-GO Criteria</h2><ol>'
        + ''.join(f'<li>{c}</li>' for c in GO_NO_GO_CRITERIA)
        + '</ol>'
        f'<p>{len(rows)} test case comparisons. Click "Click to reveal" '
        'after rating to see which is prose/json.</p>'
        + '\n'.join(rows)
        + '</body></html>'
    )

    with open(html_path, "w") as f:
        f.write(html)
    print(f"Comparison HTML: {html_path}")


# ──────────────────────────────────────────────────────────────────────
# CLI
# ──────────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="Seedance JSON prompt structure A/B test",
    )
    parser.add_argument("--project", required=True, help="Project name")
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print prompts and GO/NO-GO criteria without generating",
    )
    parser.add_argument(
        "--output-dir",
        default=None,
        help="Output directory (default: pipeline/output/ab_tests/json_structure)",
    )
    args = parser.parse_args()

    if args.output_dir:
        output_dir = Path(args.output_dir)
    else:
        output_dir = PROJECT_ROOT / "output" / "ab_tests" / "json_structure"

    if args.dry_run:
        dry_run(output_dir)
    else:
        run_generation(args.project, output_dir)


if __name__ == "__main__":
    main()