#!/usr/bin/env python3
"""
gemini_qc.py — Gemini Visual QC Pipeline

Deep visual quality control using Gemini's vision capabilities. Builds on
visual_gate.py (mechanical + basic semantic) with production-grade evaluation:

  - Keyframe QC: Detailed scoring against storyboard specs (identity, expression,
    composition, lighting, wardrobe, emotional register, technical quality)
  - Video QC: Motion coherence, identity drift, keyframe adherence, artifacts
  - Cross-episode regression: Character appearance drift across the series arc

Model strategy (calibrate down from quality):
  - Keyframe QC: gemini-3-pro-preview (establish ceiling, then test cheaper)
  - Video QC: gemini-3-pro-preview (unproven capability, start with best)
  - Regression: gemini-2.5-flash (high volume, structured scoring)

Usage:
    # Single keyframe QC
    python3 gemini_qc.py keyframe \\
        --image shot_03_first.png \\
        --storyboard storyboard_ep_001.json \\
        --shot-id 3

    # Batch keyframe QC for an episode
    python3 gemini_qc.py batch-keyframes \\
        --project leviathan --episode 1

    # Single video clip QC
    python3 gemini_qc.py video \\
        --clip shot_03.mp4 \\
        --storyboard storyboard_ep_001.json \\
        --shot-id 3

    # Batch video QC for an episode
    python3 gemini_qc.py batch-video \\
        --project leviathan --episode 1

    # Cross-episode character regression
    python3 gemini_qc.py regression \\
        --project leviathan \\
        --character jinx \\
        --episodes 1-10

    # Calibrate: compare Gemini scores to human judgment
    python3 gemini_qc.py calibrate \\
        --project leviathan --episode 1 \\
        --human-scores calibration_ep_001.json

Exit codes: 0 = all pass, 1 = some failures/edge cases, 2 = error

Requires:
    pip install google-generativeai Pillow
    export GOOGLE_API_KEY="your-key-here"
"""

# ╔════════════════════════════════════════════════════════════════════╗
# ║ DEPRECATED — Superseded by Starsend equivalents (Feb 2026).      ║
# ║ Kept alive for Recoil agent protocols + referencing scripts.     ║
# ║ Do NOT delete until agents/breakdown_agent.md, storyboard_agent, ║
# ║ engine_checks/structural.py, and batch_threepass.py are updated. ║
# ╚════════════════════════════════════════════════════════════════════╝

import argparse
import json
import os
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Tuple

from cost_tracker import CostTracker
from recoil.core.model_profiles import get_model


# ── Model Configuration ──────────────────────────────────────────────────

# Calibrate down from quality, not up from cheap.
# Start with the strongest model to establish a ceiling.
KEYFRAME_MODEL = get_model("keyframe", "qc")
VIDEO_MODEL = get_model("video", "qc")
REGRESSION_MODEL = get_model("regression", "qc")

# Override via env var for calibration testing
MODEL_OVERRIDE = os.environ.get("GEMINI_QC_MODEL")


# ── Shared Helpers ────────────────────────────────────────────────────────

def get_gemini_model(model_name: str):
    """Initialize a Gemini model for vision tasks."""
    try:
        import google.generativeai as genai
        api_key = os.environ.get("GOOGLE_API_KEY")
        if not api_key:
            print("ERROR: GOOGLE_API_KEY not set", file=sys.stderr)
            sys.exit(2)
        genai.configure(api_key=api_key)
        return genai.GenerativeModel(model_name)
    except ImportError:
        print("ERROR: google-generativeai not installed. Run: pip install google-generativeai",
              file=sys.stderr)
        sys.exit(2)


def call_gemini_vision(model, prompt: str, image_paths: List[str],
                       video_path: Optional[str] = None) -> Tuple[dict, dict]:
    """Send images/video + prompt to Gemini vision API and parse JSON response.

    Returns:
        (parsed_json, usage) where usage has 'tokens_in' and 'tokens_out'.
    """
    import PIL.Image

    parts = []

    # Add images
    for path in image_paths:
        img = PIL.Image.open(path)
        parts.append(img)

    # Add video if provided
    if video_path:
        video_bytes = Path(video_path).read_bytes()
        suffix = Path(video_path).suffix.lower()
        mime_map = {".mp4": "video/mp4", ".webm": "video/webm", ".mov": "video/quicktime"}
        mime = mime_map.get(suffix, "video/mp4")
        import google.generativeai as genai
        parts.append(genai.types.BlobDict(mime_type=mime, data=video_bytes))

    parts.append(prompt)

    response = model.generate_content(parts)
    text = response.text.strip()

    # Strip markdown code fences
    if text.startswith("```"):
        first_newline = text.index("\n") if "\n" in text else 3
        text = text[first_newline + 1:]
        if text.endswith("```"):
            text = text[:-3]
        text = text.strip()

    # Extract token usage
    usage = {"tokens_in": 0, "tokens_out": 0}
    try:
        meta = response.usage_metadata
        if meta:
            usage["tokens_in"] = getattr(meta, "prompt_token_count", 0) or 0
            usage["tokens_out"] = getattr(meta, "candidates_token_count", 0) or 0
    except Exception:
        usage["tokens_in"] = 250 + 260 * len(image_paths) + (5000 if video_path else 0)
        usage["tokens_out"] = 300

    return json.loads(text), usage


def find_project_root() -> Path:
    """Walk up from this file to find the Recoil project root."""
    candidate = Path(__file__).resolve().parent
    for _ in range(10):
        if (candidate / "tools").is_dir() and (candidate / "editors").is_dir():
            return candidate
        candidate = candidate.parent
    print("ERROR: Could not locate project root (no  found).", file=sys.stderr)
    sys.exit(2)


def load_storyboard(path: Path) -> dict:
    """Load and validate a storyboard JSON file."""
    if not path.is_file():
        print(f"ERROR: Storyboard not found: {path}", file=sys.stderr)
        sys.exit(2)
    try:
        with open(path) as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        print(f"ERROR: Invalid JSON in {path}: {e}", file=sys.stderr)
        sys.exit(2)


def load_lora_registry(project_dir: Path) -> dict:
    """Load the LoRA registry for character reference data."""
    reg_path = project_dir / "visual" / "lora_registry.json"
    if not reg_path.is_file():
        return {}
    try:
        with open(reg_path) as f:
            return json.load(f)
    except (json.JSONDecodeError, IOError):
        return {}


def resolve_character_refs(character: str, project_dir: Path,
                           storyboard: dict) -> List[str]:
    """Find character reference image paths from storyboard + filesystem.

    Checks storyboard character data first, then falls back to
    visual/characters/ directory scan.
    """
    refs = []

    # Try storyboard character data
    characters = storyboard.get("characters", {})
    char_data = characters.get(character.lower(), {})
    for rel_path in char_data.get("reference_images", []):
        full_path = project_dir / "visual" / rel_path
        if full_path.is_file():
            refs.append(str(full_path))

    # Fallback: scan visual/characters/ directory
    if not refs:
        char_dir = project_dir / "visual" / "characters" / character.lower()
        if not char_dir.is_dir():
            # Try heroes subdir pattern
            char_dir = project_dir / "visual" / "refs" / "characters" / "heroes"
        if char_dir.is_dir():
            for img in sorted(char_dir.glob("*")):
                if img.suffix.lower() in (".png", ".jpg", ".jpeg", ".webp"):
                    if character.lower().replace("_", "") in img.stem.lower().replace("_", ""):
                        refs.append(str(img))

    return refs[:5]  # Max 5 refs to control token cost


# ── Keyframe QC ───────────────────────────────────────────────────────────

KEYFRAME_QC_PROMPT = """You are a visual QC supervisor for an AI-generated vertical microdrama series.

Evaluate the GENERATED IMAGE (first image) against the SHOT SPECIFICATION and CHARACTER REFERENCES.

SHOT SPECIFICATION:
- Shot type: {shot_type}
- Camera angle: {camera_angle}
- Camera movement: {camera_movement}
- Focal length: {focal_length}
- Subject: {subject}
- Action: {action}
- Emotion: {emotion}
- Lighting: {lighting}
- Atmosphere: {atmosphere}
- Color palette: {color_palette}

GENERATION PROMPT USED:
{frame_prompt}

CHARACTER DESCRIPTION:
{character_description}

WARDROBE:
{wardrobe}

The first image is the GENERATED FRAME to evaluate.
{ref_instruction}

Score each dimension 1-10 (10 = perfect):

- character_identity: Does the character match the reference images? Same face, build, age, distinctive features? (If no character in shot or no refs, score based on prompt adherence.)
- expression_match: Does the facial expression and body language convey the intended emotion ({emotion})? Is the emotional register correct for this beat?
- composition_match: Is the framing correct for a {shot_type} at {camera_angle} angle? Rule of thirds, depth staging, focal point placement?
- lighting_match: Does the lighting match the specification? Correct source direction, color temperature, mood, practical vs ambient ratio?
- wardrobe_accuracy: Are clothing, accessories, props, damage state correct as described? Right garments for this character's wardrobe phase?
- emotional_register: Beyond expression — does the overall image FEEL like the intended emotion? Atmosphere, color temperature, depth of field all contributing?
- technical_quality: No anatomical errors, no artifacts, no text, no impossible geometry, clean generation?

Also assess:
- prompt_adherence: Are ALL elements from the generation prompt present? Nothing missing, nothing added?
- regen_recommendation: Should this frame be regenerated? ("keep", "regen_preferred", "regen_required")

Return ONLY valid JSON:
{{
  "character_identity": <int 1-10>,
  "expression_match": <int 1-10>,
  "composition_match": <int 1-10>,
  "lighting_match": <int 1-10>,
  "wardrobe_accuracy": <int 1-10>,
  "emotional_register": <int 1-10>,
  "technical_quality": <int 1-10>,
  "prompt_adherence": <int 1-10>,
  "overall_score": <float, weighted average>,
  "regen_recommendation": "keep|regen_preferred|regen_required",
  "issues": [
    {{
      "category": "<dimension name>",
      "severity": "minor|moderate|major",
      "description": "<specific, actionable description>",
      "prompt_fix": "<suggested prompt adjustment, or null>"
    }}
  ],
  "notes": "<brief overall assessment>"
}}

Scoring calibration:
  9-10: Production ready, no issues
  7-8: Acceptable with minor imperfections (keep unless regen budget allows)
  5-6: Noticeable problems, regen preferred but not blocking
  3-4: Clear failures, regen required
  1-2: Completely wrong, fundamental mismatch

Be honest and calibrated. The goal is to catch real problems, not nitpick."""


def build_keyframe_prompt(shot: dict, storyboard: dict,
                          frame_type: str = "first",
                          num_refs: int = 0) -> str:
    """Build the keyframe QC prompt from shot + storyboard data."""

    # Extract shot fields with fallbacks
    frame_prompt = shot.get(f"{frame_type}_frame", "") or shot.get("first_frame", "")
    characters = storyboard.get("characters", {})

    # Find character description for this shot
    chars_in_shot = shot.get("characters_in_shot", [])
    char_desc = ""
    wardrobe = ""
    if chars_in_shot:
        char_key = chars_in_shot[0].lower()
        char_data = characters.get(char_key, {})
        char_desc = char_data.get("visual", "")
        wardrobe = char_data.get("wardrobe", "")

    ref_instruction = ""
    if num_refs > 0:
        ref_instruction = f"The remaining {num_refs} images are CHARACTER REFERENCE images (identity ground truth)."
    else:
        ref_instruction = "No character reference images are available for comparison."

    color_palette = shot.get("color_palette", storyboard.get("color_palette", []))
    if isinstance(color_palette, list):
        color_palette = ", ".join(color_palette)

    return KEYFRAME_QC_PROMPT.format(
        shot_type=shot.get("shot_type", ""),
        camera_angle=shot.get("camera_angle", "eye"),
        camera_movement=shot.get("camera_movement", "static"),
        focal_length=shot.get("focal_length", ""),
        subject=shot.get("subject", ""),
        action=shot.get("action", ""),
        emotion=shot.get("emotion", ""),
        lighting=shot.get("lighting", ""),
        atmosphere=shot.get("atmosphere", ""),
        color_palette=color_palette,
        frame_prompt=frame_prompt,
        character_description=char_desc or "(no character description)",
        wardrobe=wardrobe or "(no wardrobe specified)",
        ref_instruction=ref_instruction,
    )


def run_keyframe_qc(image_path: str, shot: dict, storyboard: dict,
                    ref_paths: List[str], frame_type: str = "first",
                    tracker: Optional[CostTracker] = None,
                    episode: Optional[int] = None,
                    model_override: Optional[str] = None) -> dict:
    """Run deep keyframe QC on a single generated frame.

    Returns structured QC report dict.
    """
    model_name = model_override or MODEL_OVERRIDE or KEYFRAME_MODEL
    model = get_gemini_model(model_name)

    prompt = build_keyframe_prompt(shot, storyboard, frame_type, len(ref_paths))
    all_images = [image_path] + ref_paths

    shot_id = shot.get("id")
    t0 = time.time()

    try:
        scores, usage = call_gemini_vision(model, prompt, all_images)
    except Exception as e:
        elapsed_ms = int((time.time() - t0) * 1000)
        if tracker:
            tracker.log(
                category="qc", provider="gemini", model=model_name,
                tokens_in=0, tokens_out=0, duration_ms=elapsed_ms,
                episode=episode, shot_id=shot_id,
                detail=f"Gemini keyframe QC — error: {str(e)[:100]}",
                success=False,
            )
        return {
            "shot_id": shot_id,
            "frame_type": frame_type,
            "qc_result": "error",
            "error": str(e),
        }

    elapsed_ms = int((time.time() - t0) * 1000)

    # Normalize scores
    dimensions = ["character_identity", "expression_match", "composition_match",
                   "lighting_match", "wardrobe_accuracy", "emotional_register",
                   "technical_quality", "prompt_adherence"]

    normalized = {}
    for dim in dimensions:
        val = scores.get(dim, 0)
        if not isinstance(val, (int, float)):
            val = 0
        normalized[dim] = int(val)

    # Calculate overall if not provided
    overall = scores.get("overall_score")
    if not isinstance(overall, (int, float)):
        vals = [v for v in normalized.values() if v > 0]
        overall = round(sum(vals) / len(vals), 1) if vals else 0

    # Determine pass/fail
    regen = scores.get("regen_recommendation", "keep")
    if regen not in ("keep", "regen_preferred", "regen_required"):
        if overall >= 7:
            regen = "keep"
        elif overall >= 5:
            regen = "regen_preferred"
        else:
            regen = "regen_required"

    if regen == "keep":
        qc_result = "pass"
    elif regen == "regen_preferred":
        qc_result = "marginal"
    else:
        qc_result = "fail"

    # Log cost
    if tracker:
        tracker.log(
            category="qc", provider="gemini", model=model_name,
            tokens_in=usage["tokens_in"], tokens_out=usage["tokens_out"],
            duration_ms=elapsed_ms,
            episode=episode, shot_id=shot_id,
            detail=f"Gemini keyframe QC — {frame_type} frame — {qc_result}",
            success=True,
        )

    return {
        "shot_id": shot_id,
        "shot_name": shot.get("name", ""),
        "frame_type": frame_type,
        "qc_result": qc_result,
        "scores": normalized,
        "overall_score": overall,
        "regen_recommendation": regen,
        "issues": scores.get("issues", []),
        "notes": scores.get("notes", ""),
        "model": model_name,
        "image": image_path,
        "ref_count": len(ref_paths),
    }


# ── Video QC ──────────────────────────────────────────────────────────────

VIDEO_QC_PROMPT = """You are a visual QC supervisor evaluating an AI-generated video clip for a vertical microdrama.

SHOT SPECIFICATION:
- Shot type: {shot_type}
- Camera movement: {camera_movement}
- Subject: {subject}
- Action: {action}
- Emotion: {emotion}

MOTION DIRECTION:
{motion_prompt}

FIRST FRAME TARGET:
{first_frame}

LAST FRAME TARGET:
{last_frame}

CHARACTER: {character_name}

The VIDEO CLIP is the generated output to evaluate.
{ref_instruction}

Score each dimension 1-10 (10 = perfect):

- character_consistency: Does the character maintain the same face, build, and features throughout the clip? No morphing, no identity shifts between frames?
- motion_coherence: Is the motion smooth and physically plausible? No teleporting, no impossible physics, no jitter?
- identity_drift: How much does the character's appearance drift from start to end? (10 = no drift, 1 = completely different person by end)
- artifact_presence: (10 = clean, 1 = severe artifacts) Check for: melting, morphing, flickering, ghosting, temporal inconsistency
- emotional_arc: Does the clip convey the intended emotional progression? Does body language and expression match the motion direction?
- keyframe_adherence_first: How closely does the first frame of the clip match the first frame target description?
- keyframe_adherence_last: How closely does the last frame match the last frame target description?
- camera_movement: Does the camera move as specified ({camera_movement})? Correct direction, speed, smoothness?

Return ONLY valid JSON:
{{
  "character_consistency": <int 1-10>,
  "motion_coherence": <int 1-10>,
  "identity_drift": <int 1-10>,
  "artifact_presence": <int 1-10>,
  "emotional_arc": <int 1-10>,
  "keyframe_adherence_first": <int 1-10>,
  "keyframe_adherence_last": <int 1-10>,
  "camera_movement": <int 1-10>,
  "overall_score": <float>,
  "regen_recommendation": "keep|regen_preferred|regen_required",
  "issues": [
    {{
      "category": "<dimension>",
      "severity": "minor|moderate|major",
      "timestamp": "<approx timestamp in clip, e.g. '1.5s'>",
      "description": "<specific issue>"
    }}
  ],
  "notes": "<brief overall assessment>"
}}

Be strict on identity drift and character consistency — these are the hardest problems in AI video and the most important for production quality."""


def run_video_qc(clip_path: str, shot: dict, storyboard: dict,
                 ref_paths: List[str],
                 tracker: Optional[CostTracker] = None,
                 episode: Optional[int] = None,
                 model_override: Optional[str] = None) -> dict:
    """Run video QC on a single generated clip.

    Returns structured QC report dict.
    """
    model_name = model_override or MODEL_OVERRIDE or VIDEO_MODEL
    model = get_gemini_model(model_name)

    # Build character context
    characters = storyboard.get("characters", {})
    chars_in_shot = shot.get("characters_in_shot", [])
    char_name = chars_in_shot[0] if chars_in_shot else "unknown"

    ref_instruction = ""
    if ref_paths:
        ref_instruction = f"The {len(ref_paths)} image(s) provided are CHARACTER REFERENCE images for identity comparison."
    else:
        ref_instruction = "No character reference images available."

    prompt = VIDEO_QC_PROMPT.format(
        shot_type=shot.get("shot_type", ""),
        camera_movement=shot.get("camera_movement", "static"),
        subject=shot.get("subject", ""),
        action=shot.get("action", ""),
        emotion=shot.get("emotion", ""),
        motion_prompt=shot.get("motion_prompt", "(no motion direction)"),
        first_frame=shot.get("first_frame", "(no first frame description)"),
        last_frame=shot.get("last_frame", "(no last frame description)"),
        character_name=char_name,
        ref_instruction=ref_instruction,
    )

    shot_id = shot.get("id")
    t0 = time.time()

    try:
        scores, usage = call_gemini_vision(
            model, prompt, ref_paths, video_path=clip_path
        )
    except Exception as e:
        elapsed_ms = int((time.time() - t0) * 1000)
        if tracker:
            tracker.log(
                category="qc", provider="gemini", model=model_name,
                tokens_in=0, tokens_out=0, duration_ms=elapsed_ms,
                episode=episode, shot_id=shot_id,
                detail=f"Gemini video QC — error: {str(e)[:100]}",
                success=False,
            )
        return {
            "shot_id": shot_id,
            "qc_result": "error",
            "error": str(e),
        }

    elapsed_ms = int((time.time() - t0) * 1000)

    dimensions = ["character_consistency", "motion_coherence", "identity_drift",
                   "artifact_presence", "emotional_arc", "keyframe_adherence_first",
                   "keyframe_adherence_last", "camera_movement"]

    normalized = {}
    for dim in dimensions:
        val = scores.get(dim, 0)
        if not isinstance(val, (int, float)):
            val = 0
        normalized[dim] = int(val)

    overall = scores.get("overall_score")
    if not isinstance(overall, (int, float)):
        vals = [v for v in normalized.values() if v > 0]
        overall = round(sum(vals) / len(vals), 1) if vals else 0

    regen = scores.get("regen_recommendation", "keep")
    if regen not in ("keep", "regen_preferred", "regen_required"):
        if overall >= 7:
            regen = "keep"
        elif overall >= 5:
            regen = "regen_preferred"
        else:
            regen = "regen_required"

    if regen == "keep":
        qc_result = "pass"
    elif regen == "regen_preferred":
        qc_result = "marginal"
    else:
        qc_result = "fail"

    if tracker:
        tracker.log(
            category="qc", provider="gemini", model=model_name,
            tokens_in=usage["tokens_in"], tokens_out=usage["tokens_out"],
            duration_ms=elapsed_ms,
            episode=episode, shot_id=shot_id,
            detail=f"Gemini video QC — {qc_result}",
            success=True,
        )

    return {
        "shot_id": shot_id,
        "shot_name": shot.get("name", ""),
        "qc_result": qc_result,
        "scores": normalized,
        "overall_score": overall,
        "regen_recommendation": regen,
        "issues": scores.get("issues", []),
        "notes": scores.get("notes", ""),
        "model": model_name,
        "clip": clip_path,
        "ref_count": len(ref_paths),
    }


# ── Cross-Episode Regression ─────────────────────────────────────────────

REGRESSION_PROMPT = """You are evaluating CHARACTER CONSISTENCY across multiple episodes of an AI-generated microdrama series.

CHARACTER: {character_name}
CHARACTER DESCRIPTION: {character_description}

You are being shown {num_images} images of this character from different episodes/shots.
They should all look like THE SAME PERSON with consistent:
- Facial features (bone structure, eye shape, nose, jaw)
- Build and proportions
- Hair style and color
- Skin tone
- Distinctive features (scars, tattoos, cybernetics, etc.)

Wardrobe may change between episodes — that's intentional. Focus on IDENTITY, not clothing.

Score the batch:

- identity_consistency: Do all images look like the same person? (10 = identical, 1 = completely different people)
- feature_stability: Are distinctive features (as described) present in all images? (scars, eye color, build)
- proportion_stability: Is the character's build/height/proportions consistent across images?
- age_consistency: Does the character appear the same age across all images?
- overall_drift: Overall assessment of character drift (10 = no drift, 1 = severe drift)

Return ONLY valid JSON:
{{
  "identity_consistency": <int 1-10>,
  "feature_stability": <int 1-10>,
  "proportion_stability": <int 1-10>,
  "age_consistency": <int 1-10>,
  "overall_drift": <int 1-10>,
  "drift_detected": <bool>,
  "worst_outliers": [
    {{
      "image_index": <int, 0-based>,
      "issue": "<what's different about this image>"
    }}
  ],
  "notes": "<overall assessment of character consistency>"
}}

Be strict — identity drift is the #1 killer of AI-generated series quality."""


def run_regression(character: str, image_paths: List[str],
                   character_desc: str = "",
                   tracker: Optional[CostTracker] = None,
                   model_override: Optional[str] = None) -> dict:
    """Run cross-episode character regression on a batch of images.

    Args:
        character: Character name (e.g., "jinx")
        image_paths: List of image paths from different episodes/shots
        character_desc: Character visual description from storyboard
        tracker: CostTracker instance
        model_override: Override model selection

    Returns structured regression report.
    """
    model_name = model_override or MODEL_OVERRIDE or REGRESSION_MODEL
    model = get_gemini_model(model_name)

    prompt = REGRESSION_PROMPT.format(
        character_name=character,
        character_description=character_desc or "(no description available)",
        num_images=len(image_paths),
    )

    t0 = time.time()

    try:
        scores, usage = call_gemini_vision(model, prompt, image_paths)
    except Exception as e:
        elapsed_ms = int((time.time() - t0) * 1000)
        if tracker:
            tracker.log(
                category="qc", provider="gemini", model=model_name,
                tokens_in=0, tokens_out=0, duration_ms=elapsed_ms,
                detail=f"Gemini regression — {character} — error: {str(e)[:100]}",
                success=False,
            )
        return {
            "character": character,
            "result": "error",
            "error": str(e),
        }

    elapsed_ms = int((time.time() - t0) * 1000)

    dimensions = ["identity_consistency", "feature_stability",
                   "proportion_stability", "age_consistency", "overall_drift"]

    normalized = {}
    for dim in dimensions:
        val = scores.get(dim, 0)
        if not isinstance(val, (int, float)):
            val = 0
        normalized[dim] = int(val)

    drift = scores.get("drift_detected", False)
    if not isinstance(drift, bool):
        drift = normalized.get("overall_drift", 10) < 7

    if tracker:
        tracker.log(
            category="qc", provider="gemini", model=model_name,
            tokens_in=usage["tokens_in"], tokens_out=usage["tokens_out"],
            duration_ms=elapsed_ms,
            detail=f"Gemini regression — {character} — {len(image_paths)} images"
                   f" — drift={'YES' if drift else 'no'}",
            success=True,
        )

    return {
        "character": character,
        "result": "drift_detected" if drift else "consistent",
        "scores": normalized,
        "drift_detected": drift,
        "worst_outliers": scores.get("worst_outliers", []),
        "notes": scores.get("notes", ""),
        "model": model_name,
        "image_count": len(image_paths),
    }


# ── Batch Operations ─────────────────────────────────────────────────────

def find_frame_paths(shot_id: int, project_dir: Path, episode: int) -> Dict[str, str]:
    """Find generated frame paths for a shot.

    Searches multiple possible locations for keyframes.
    Returns dict with keys like 'first', 'last', 'hero' mapping to file paths.
    """
    ep_str = str(episode).zfill(3)
    frames = {}

    # Search patterns for frame locations
    search_dirs = [
        project_dir / "storyboards" / "flux2_frames" / f"ep_{ep_str}",
        project_dir / "storyboards" / "z_image_frames" / f"ep_{ep_str}",
        project_dir / "storyboards" / "rough_frames" / f"ep_{ep_str}",
        project_dir / "storyboards" / "assets" / f"ep_{ep_str}",
    ]

    shot_str = str(shot_id).zfill(2)

    for search_dir in search_dirs:
        if not search_dir.is_dir():
            continue

        # Try manifest first
        manifest_path = search_dir / "manifest.json"
        if manifest_path.is_file():
            try:
                with open(manifest_path) as f:
                    manifest = json.load(f)
                frame_data = manifest.get("frames", {}).get(str(shot_id), {})
                for frame_type in ("first_frame", "last_frame", "hero_frame"):
                    rel = frame_data.get(frame_type)
                    if rel:
                        full = project_dir / rel
                        if full.is_file():
                            key = frame_type.replace("_frame", "")
                            frames[key] = str(full)
            except (json.JSONDecodeError, IOError):
                pass

        # Fallback: glob for shot files
        if not frames:
            for img in search_dir.glob(f"*_S{shot_str}_*"):
                if img.suffix.lower() in (".png", ".jpg", ".jpeg", ".webp"):
                    name = img.stem.lower()
                    if "first" in name:
                        frames["first"] = str(img)
                    elif "last" in name:
                        frames["last"] = str(img)
                    elif "hero" in name or "anchor" in name:
                        frames["hero"] = str(img)
                    elif "first" not in frames:
                        frames["first"] = str(img)

    return frames


def find_clip_path(shot_id: int, project_dir: Path, episode: int) -> Optional[str]:
    """Find generated video clip for a shot."""
    ep_str = str(episode).zfill(3)
    shot_str = str(shot_id).zfill(2)

    search_dirs = [
        project_dir / "storyboards" / "video_clips" / f"ep_{ep_str}",
        project_dir / "storyboards" / "clips" / f"ep_{ep_str}",
        project_dir / "storyboards" / "assets" / f"ep_{ep_str}",
    ]

    for search_dir in search_dirs:
        if not search_dir.is_dir():
            continue
        for clip in search_dir.glob(f"*_S{shot_str}_*"):
            if clip.suffix.lower() in (".mp4", ".webm", ".mov"):
                return str(clip)

    return None


def cmd_keyframe(args):
    """Run keyframe QC on a single frame."""
    if not os.path.exists(args.image):
        print(f"ERROR: Image not found: {args.image}", file=sys.stderr)
        return 2

    # Load storyboard
    shot = {}
    storyboard = {}
    if args.storyboard:
        storyboard = load_storyboard(Path(args.storyboard))
        for s in storyboard.get("shots", []):
            if s.get("id") == args.shot_id:
                shot = s
                break

    # Resolve refs
    ref_paths = []
    if args.refs:
        for r in args.refs:
            if os.path.exists(r):
                ref_paths.append(r)
    elif args.storyboard:
        project_root = find_project_root()
        # Infer project dir from storyboard path
        sb_path = Path(args.storyboard).resolve()
        for parent in sb_path.parents:
            if (parent / "visual").is_dir():
                chars_in_shot = shot.get("characters_in_shot", [])
                if chars_in_shot:
                    ref_paths = resolve_character_refs(chars_in_shot[0], parent, storyboard)
                break

    # Determine tracker
    tracker = None
    img_path = Path(args.image).resolve()
    for parent in img_path.parents:
        if (parent / "visual").is_dir() and (parent / "treatment.md").is_file():
            tracker = CostTracker(parent)
            break

    print(f"Running keyframe QC on {args.image}...", file=sys.stderr)
    print(f"  Model: {args.model or MODEL_OVERRIDE or KEYFRAME_MODEL}", file=sys.stderr)
    print(f"  Shot: {shot.get('name', 'unknown')} (ID {args.shot_id})", file=sys.stderr)
    print(f"  Refs: {len(ref_paths)}", file=sys.stderr)

    result = run_keyframe_qc(
        args.image, shot, storyboard, ref_paths,
        frame_type=args.frame_type,
        tracker=tracker,
        episode=args.episode,
        model_override=args.model,
    )

    result["timestamp"] = datetime.now(timezone.utc).isoformat()
    print(json.dumps(result, indent=2))

    qc = result.get("qc_result", "error")
    print(f"\n{qc.upper()} — overall {result.get('overall_score', 0)}/10"
          f" — {result.get('regen_recommendation', '?')}", file=sys.stderr)

    return 0 if qc == "pass" else 1


def cmd_batch_keyframes(args):
    """Run keyframe QC on all frames for an episode."""
    project_root = find_project_root()
    project_dir = project_root / args.project

    if not project_dir.is_dir():
        print(f"ERROR: Project not found: {args.project}", file=sys.stderr)
        return 2

    ep_str = str(args.episode).zfill(3)
    sb_path = project_dir / "storyboards" / f"storyboard_ep_{ep_str}.json"
    storyboard = load_storyboard(sb_path)
    shots = storyboard.get("shots", [])

    if not shots:
        print("ERROR: No shots in storyboard", file=sys.stderr)
        return 2

    tracker = CostTracker(project_dir)
    results = {}
    summary = {"pass": 0, "marginal": 0, "fail": 0, "skipped": 0, "error": 0}

    for i, shot in enumerate(shots):
        shot_id = shot.get("id", i + 1)
        frame_paths = find_frame_paths(shot_id, project_dir, args.episode)

        if not frame_paths:
            print(f"  Shot {shot_id} ({shot.get('name', '')}): SKIP (no frames)",
                  file=sys.stderr)
            summary["skipped"] += 1
            continue

        # QC each available frame type
        chars_in_shot = shot.get("characters_in_shot", [])
        ref_paths = []
        if chars_in_shot:
            ref_paths = resolve_character_refs(chars_in_shot[0], project_dir, storyboard)

        shot_results = {}
        for frame_type, frame_path in frame_paths.items():
            print(f"  Shot {shot_id} ({shot.get('name', '')}) [{frame_type}]: ",
                  end="", file=sys.stderr, flush=True)

            result = run_keyframe_qc(
                frame_path, shot, storyboard, ref_paths,
                frame_type=frame_type,
                tracker=tracker,
                episode=args.episode,
                model_override=args.model,
            )
            shot_results[frame_type] = result

            qc = result.get("qc_result", "error")
            score = result.get("overall_score", 0)
            print(f"{qc.upper()} ({score}/10)", file=sys.stderr)

            if qc in summary:
                summary[qc] += 1
            else:
                summary["error"] += 1

            # Rate limit
            if i < len(shots) - 1 or frame_type != list(frame_paths.keys())[-1]:
                time.sleep(3)

        results[str(shot_id)] = shot_results

    # Build output
    output = {
        "episode": args.episode,
        "project": args.project,
        "model": args.model or MODEL_OVERRIDE or KEYFRAME_MODEL,
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "summary": summary,
        "shots": results,
    }

    # Save report
    reviews_dir = project_dir / "storyboards" / "reviews"
    reviews_dir.mkdir(parents=True, exist_ok=True)
    output_path = reviews_dir / f"gemini_qc_keyframes_ep_{ep_str}.json"
    with open(output_path, "w") as f:
        json.dump(output, f, indent=2)

    print(json.dumps(output, indent=2))

    total = sum(summary.values())
    print(f"\nEpisode {args.episode} Keyframe QC — {total} frames processed",
          file=sys.stderr)
    print(f"  Pass:     {summary['pass']}", file=sys.stderr)
    print(f"  Marginal: {summary['marginal']}", file=sys.stderr)
    print(f"  Fail:     {summary['fail']}", file=sys.stderr)
    print(f"  Skipped:  {summary['skipped']}", file=sys.stderr)
    print(f"  Error:    {summary['error']}", file=sys.stderr)
    print(f"Saved to: {output_path}", file=sys.stderr)

    return 1 if (summary["fail"] > 0 or summary["error"] > 0) else 0


def cmd_video(args):
    """Run video QC on a single clip."""
    if not os.path.exists(args.clip):
        print(f"ERROR: Clip not found: {args.clip}", file=sys.stderr)
        return 2

    shot = {}
    storyboard = {}
    if args.storyboard:
        storyboard = load_storyboard(Path(args.storyboard))
        for s in storyboard.get("shots", []):
            if s.get("id") == args.shot_id:
                shot = s
                break

    ref_paths = []
    if args.refs:
        for r in args.refs:
            if os.path.exists(r):
                ref_paths.append(r)
    elif args.storyboard:
        sb_path = Path(args.storyboard).resolve()
        for parent in sb_path.parents:
            if (parent / "visual").is_dir():
                chars_in_shot = shot.get("characters_in_shot", [])
                if chars_in_shot:
                    ref_paths = resolve_character_refs(chars_in_shot[0], parent, storyboard)
                break

    tracker = None
    clip_path = Path(args.clip).resolve()
    for parent in clip_path.parents:
        if (parent / "visual").is_dir() and (parent / "treatment.md").is_file():
            tracker = CostTracker(parent)
            break

    print(f"Running video QC on {args.clip}...", file=sys.stderr)
    print(f"  Model: {args.model or MODEL_OVERRIDE or VIDEO_MODEL}", file=sys.stderr)

    result = run_video_qc(
        args.clip, shot, storyboard, ref_paths,
        tracker=tracker,
        episode=args.episode,
        model_override=args.model,
    )

    result["timestamp"] = datetime.now(timezone.utc).isoformat()
    print(json.dumps(result, indent=2))

    qc = result.get("qc_result", "error")
    print(f"\n{qc.upper()} — overall {result.get('overall_score', 0)}/10",
          file=sys.stderr)

    return 0 if qc == "pass" else 1


def cmd_batch_video(args):
    """Run video QC on all clips for an episode."""
    project_root = find_project_root()
    project_dir = project_root / args.project

    if not project_dir.is_dir():
        print(f"ERROR: Project not found: {args.project}", file=sys.stderr)
        return 2

    ep_str = str(args.episode).zfill(3)
    sb_path = project_dir / "storyboards" / f"storyboard_ep_{ep_str}.json"
    storyboard = load_storyboard(sb_path)
    shots = storyboard.get("shots", [])

    if not shots:
        print("ERROR: No shots in storyboard", file=sys.stderr)
        return 2

    tracker = CostTracker(project_dir)
    results = {}
    summary = {"pass": 0, "marginal": 0, "fail": 0, "skipped": 0, "error": 0}

    for i, shot in enumerate(shots):
        shot_id = shot.get("id", i + 1)
        clip_path = find_clip_path(shot_id, project_dir, args.episode)

        if not clip_path:
            # Held frame shots have no video — not a skip, just expected
            approach = shot.get("generation_approach", "")
            if approach in ("held_frame_static", "held_frame_push"):
                continue
            print(f"  Shot {shot_id} ({shot.get('name', '')}): SKIP (no clip)",
                  file=sys.stderr)
            summary["skipped"] += 1
            continue

        chars_in_shot = shot.get("characters_in_shot", [])
        ref_paths = []
        if chars_in_shot:
            ref_paths = resolve_character_refs(chars_in_shot[0], project_dir, storyboard)

        print(f"  Shot {shot_id} ({shot.get('name', '')}): ",
              end="", file=sys.stderr, flush=True)

        result = run_video_qc(
            clip_path, shot, storyboard, ref_paths,
            tracker=tracker, episode=args.episode,
            model_override=args.model,
        )
        results[str(shot_id)] = result

        qc = result.get("qc_result", "error")
        score = result.get("overall_score", 0)
        print(f"{qc.upper()} ({score}/10)", file=sys.stderr)

        if qc in summary:
            summary[qc] += 1
        else:
            summary["error"] += 1

        # Rate limit — video analysis takes more tokens
        if i < len(shots) - 1:
            time.sleep(5)

    output = {
        "episode": args.episode,
        "project": args.project,
        "model": args.model or MODEL_OVERRIDE or VIDEO_MODEL,
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "summary": summary,
        "shots": results,
    }

    reviews_dir = project_dir / "storyboards" / "reviews"
    reviews_dir.mkdir(parents=True, exist_ok=True)
    output_path = reviews_dir / f"gemini_qc_video_ep_{ep_str}.json"
    with open(output_path, "w") as f:
        json.dump(output, f, indent=2)

    print(json.dumps(output, indent=2))

    total = sum(summary.values())
    print(f"\nEpisode {args.episode} Video QC — {total} clips processed",
          file=sys.stderr)
    print(f"  Pass:     {summary['pass']}", file=sys.stderr)
    print(f"  Marginal: {summary['marginal']}", file=sys.stderr)
    print(f"  Fail:     {summary['fail']}", file=sys.stderr)
    print(f"  Skipped:  {summary['skipped']}", file=sys.stderr)
    print(f"  Error:    {summary['error']}", file=sys.stderr)
    print(f"Saved to: {output_path}", file=sys.stderr)

    return 1 if (summary["fail"] > 0 or summary["error"] > 0) else 0


def cmd_regression(args):
    """Run cross-episode character regression."""
    project_root = find_project_root()
    project_dir = project_root / args.project

    if not project_dir.is_dir():
        print(f"ERROR: Project not found: {args.project}", file=sys.stderr)
        return 2

    character = args.character.lower()

    # Parse episode range
    if "-" in args.episodes:
        start, end = args.episodes.split("-")
        ep_range = range(int(start), int(end) + 1)
    else:
        ep_range = [int(e) for e in args.episodes.split(",")]

    # Collect one representative frame per episode for this character
    collected_images = []
    collected_info = []

    for ep_num in ep_range:
        ep_str = str(ep_num).zfill(3)
        sb_path = project_dir / "storyboards" / f"storyboard_ep_{ep_str}.json"
        if not sb_path.is_file():
            continue

        storyboard = load_storyboard(sb_path)
        shots = storyboard.get("shots", [])

        # Find first shot with this character
        for shot in shots:
            chars_in_shot = [c.lower() for c in shot.get("characters_in_shot", [])]
            if character in chars_in_shot:
                shot_id = shot.get("id")
                frame_paths = find_frame_paths(shot_id, project_dir, ep_num)
                # Prefer hero > first > last
                frame = (frame_paths.get("hero")
                         or frame_paths.get("first")
                         or frame_paths.get("last"))
                if frame:
                    collected_images.append(frame)
                    collected_info.append({"episode": ep_num, "shot_id": shot_id,
                                           "path": frame})
                    break

    if len(collected_images) < 2:
        print(f"ERROR: Need at least 2 images for regression, found {len(collected_images)}",
              file=sys.stderr)
        return 2

    # Gemini has image limits — batch into groups of 16
    MAX_IMAGES = 16
    if len(collected_images) > MAX_IMAGES:
        print(f"  Sampling {MAX_IMAGES} images from {len(collected_images)} available",
              file=sys.stderr)
        step = len(collected_images) / MAX_IMAGES
        indices = [int(i * step) for i in range(MAX_IMAGES)]
        collected_images = [collected_images[i] for i in indices]
        collected_info = [collected_info[i] for i in indices]

    # Get character description from first available storyboard
    char_desc = ""
    for ep_num in ep_range:
        ep_str = str(ep_num).zfill(3)
        sb_path = project_dir / "storyboards" / f"storyboard_ep_{ep_str}.json"
        if sb_path.is_file():
            sb = load_storyboard(sb_path)
            char_data = sb.get("characters", {}).get(character, {})
            char_desc = char_data.get("visual", "")
            if char_desc:
                break

    tracker = CostTracker(project_dir)

    print(f"Running regression for {character} across {len(collected_images)} episodes...",
          file=sys.stderr)
    print(f"  Model: {args.model or MODEL_OVERRIDE or REGRESSION_MODEL}", file=sys.stderr)

    result = run_regression(
        character, collected_images, char_desc,
        tracker=tracker,
        model_override=args.model,
    )

    result["episodes_checked"] = [info["episode"] for info in collected_info]
    result["image_sources"] = collected_info
    result["timestamp"] = datetime.now(timezone.utc).isoformat()

    # Save report
    reviews_dir = project_dir / "storyboards" / "reviews"
    reviews_dir.mkdir(parents=True, exist_ok=True)
    ep_label = args.episodes.replace(",", "_").replace("-", "_")
    output_path = reviews_dir / f"gemini_regression_{character}_ep{ep_label}.json"
    with open(output_path, "w") as f:
        json.dump(result, f, indent=2)

    print(json.dumps(result, indent=2))

    drift = result.get("drift_detected", False)
    overall = result.get("scores", {}).get("overall_drift", 0)
    status = "DRIFT DETECTED" if drift else "CONSISTENT"
    print(f"\n{status} — drift score {overall}/10", file=sys.stderr)
    print(f"Saved to: {output_path}", file=sys.stderr)

    return 1 if drift else 0


def cmd_calibrate(args):
    """Compare Gemini QC scores to human judgment for calibration.

    Expects a human scores JSON file with structure:
    {
      "shots": {
        "3": {"human_score": 7, "human_verdict": "keep", "notes": "..."},
        ...
      }
    }
    """
    project_root = find_project_root()
    project_dir = project_root / args.project

    ep_str = str(args.episode).zfill(3)

    # Load Gemini QC results
    reviews_dir = project_dir / "storyboards" / "reviews"
    gemini_path = reviews_dir / f"gemini_qc_keyframes_ep_{ep_str}.json"
    if not gemini_path.is_file():
        print(f"ERROR: No Gemini QC results found. Run batch-keyframes first.",
              file=sys.stderr)
        return 2

    with open(gemini_path) as f:
        gemini_data = json.load(f)

    # Load human scores
    if not os.path.exists(args.human_scores):
        print(f"ERROR: Human scores file not found: {args.human_scores}",
              file=sys.stderr)
        return 2

    with open(args.human_scores) as f:
        human_data = json.load(f)

    # Compare
    agreements = 0
    disagreements = 0
    comparisons = []

    human_shots = human_data.get("shots", {})
    gemini_shots = gemini_data.get("shots", {})

    for shot_id, human in human_shots.items():
        gemini = gemini_shots.get(shot_id, {})
        # Get first available frame type
        if isinstance(gemini, dict) and not gemini.get("qc_result"):
            # Nested by frame type — take first
            for ft, ft_data in gemini.items():
                gemini = ft_data
                break

        if not gemini or gemini.get("qc_result") == "error":
            continue

        human_verdict = human.get("human_verdict", "keep")
        gemini_verdict = gemini.get("regen_recommendation", "keep")

        # Map to binary: keep vs regen
        h_keep = human_verdict in ("keep", "pass")
        g_keep = gemini_verdict == "keep"

        agree = h_keep == g_keep
        if agree:
            agreements += 1
        else:
            disagreements += 1

        comparisons.append({
            "shot_id": shot_id,
            "human_score": human.get("human_score"),
            "gemini_score": gemini.get("overall_score"),
            "human_verdict": human_verdict,
            "gemini_verdict": gemini_verdict,
            "agreement": agree,
        })

    total = agreements + disagreements
    agreement_rate = round(agreements / total * 100, 1) if total else 0

    output = {
        "episode": args.episode,
        "total_compared": total,
        "agreements": agreements,
        "disagreements": disagreements,
        "agreement_rate": agreement_rate,
        "calibration_grade": (
            "excellent" if agreement_rate >= 90 else
            "good" if agreement_rate >= 80 else
            "acceptable" if agreement_rate >= 70 else
            "needs_tuning" if agreement_rate >= 60 else
            "unreliable"
        ),
        "comparisons": comparisons,
        "timestamp": datetime.now(timezone.utc).isoformat(),
    }

    print(json.dumps(output, indent=2))

    print(f"\nCalibration: {agreement_rate}% agreement ({agreements}/{total})",
          file=sys.stderr)
    print(f"Grade: {output['calibration_grade'].upper()}", file=sys.stderr)

    return 0 if agreement_rate >= 70 else 1


# ── CLI ───────────────────────────────────────────────────────────────────

def build_parser():
    parser = argparse.ArgumentParser(
        description="Gemini Visual QC Pipeline — keyframe, video, and regression analysis",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )

    sub = parser.add_subparsers(dest="command", help="QC mode")

    # keyframe — single frame
    kf = sub.add_parser("keyframe", help="Deep QC on a single keyframe")
    kf.add_argument("--image", required=True, help="Path to generated frame")
    kf.add_argument("--storyboard", help="Storyboard JSON path")
    kf.add_argument("--shot-id", type=int, default=0, help="Shot ID in storyboard")
    kf.add_argument("--episode", type=int, help="Episode number")
    kf.add_argument("--frame-type", default="first",
                    choices=["first", "last", "hero"],
                    help="Which frame type (default: first)")
    kf.add_argument("--refs", nargs="+", help="Character reference image paths")
    kf.add_argument("--model", help="Override Gemini model")

    # batch-keyframes — all frames for an episode
    bkf = sub.add_parser("batch-keyframes", help="QC all keyframes for an episode")
    bkf.add_argument("--project", required=True, help="Project name")
    bkf.add_argument("--episode", type=int, required=True, help="Episode number")
    bkf.add_argument("--model", help="Override Gemini model")

    # video — single clip
    vid = sub.add_parser("video", help="QC a single video clip")
    vid.add_argument("--clip", required=True, help="Path to video clip")
    vid.add_argument("--storyboard", help="Storyboard JSON path")
    vid.add_argument("--shot-id", type=int, default=0, help="Shot ID")
    vid.add_argument("--episode", type=int, help="Episode number")
    vid.add_argument("--refs", nargs="+", help="Character reference image paths")
    vid.add_argument("--model", help="Override Gemini model")

    # batch-video — all clips for an episode
    bvid = sub.add_parser("batch-video", help="QC all video clips for an episode")
    bvid.add_argument("--project", required=True, help="Project name")
    bvid.add_argument("--episode", type=int, required=True, help="Episode number")
    bvid.add_argument("--model", help="Override Gemini model")

    # regression — cross-episode character drift
    reg = sub.add_parser("regression", help="Cross-episode character drift check")
    reg.add_argument("--project", required=True, help="Project name")
    reg.add_argument("--character", required=True, help="Character name")
    reg.add_argument("--episodes", required=True,
                     help="Episode range (e.g., 1-10 or 1,5,10,15)")
    reg.add_argument("--model", help="Override Gemini model")

    # calibrate — compare to human judgment
    cal = sub.add_parser("calibrate", help="Compare Gemini vs human QC scores")
    cal.add_argument("--project", required=True, help="Project name")
    cal.add_argument("--episode", type=int, required=True, help="Episode number")
    cal.add_argument("--human-scores", required=True,
                     help="Path to human scores JSON")

    return parser


def main():
    parser = build_parser()
    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return 0

    commands = {
        "keyframe": cmd_keyframe,
        "batch-keyframes": cmd_batch_keyframes,
        "video": cmd_video,
        "batch-video": cmd_batch_video,
        "regression": cmd_regression,
        "calibrate": cmd_calibrate,
    }

    handler = commands.get(args.command)
    if not handler:
        parser.print_help()
        return 2

    return handler(args)


if __name__ == "__main__":
    sys.exit(main())