#!/usr/bin/env python3
"""
Storyboard verification script.

Validates that a storyboard JSON faithfully covers the source episode script
with no gaps, no hallucinated content, and correct structural integrity.

Usage:
  python3 validate_storyboard.py storyboard.json episode.md
  python3 validate_storyboard.py storyboard.json episode.md --json
  python3 validate_storyboard.py storyboard.json episode.md --prompt

Exit codes:
  0 = valid
  1 = errors found
  2 = file/parse error
"""

# ╔════════════════════════════════════════════════════════════════════╗
# ║ DEPRECATED — Superseded by Starsend equivalents (Feb 2026).      ║
# ║ Kept alive for Recoil agent protocols + referencing scripts.     ║
# ║ Do NOT delete until agents/breakdown_agent.md, storyboard_agent, ║
# ║ engine_checks/structural.py, and batch_threepass.py are updated. ║
# ╚════════════════════════════════════════════════════════════════════╝

import argparse
import json
import os
import re
import sys


# ── Shot Grammar Constants ──

SCALE_ORDER = {"ECU": 0, "CU": 1, "MCU": 2, "MS": 3, "LS": 4, "WIDE": 5}
TIGHT_SCALES = {"ECU", "CU", "MCU"}
MEDIUM_SCALES = {"MS"}
WIDE_SCALES = {"LS", "WIDE"}
SPECIAL_TYPES = {"POV", "VFX"}


# ── Prose Spec v1.0 Constants (Gemini-validated, Feb 2026) ──

_PROSE_BANNED_PATTERNS = [
    # Camera/lens/shot type
    re.compile(r"\b(?:close-up|close up|medium shot|wide shot|long shot|extreme|establishing)\b", re.I),
    re.compile(r"\b(?:eye level|low angle|high angle|oblique|dutch)\b", re.I),
    re.compile(r"\b\d+mm\b|\bf/\d\.\d\b|\b(?:focal|aperture|lens|depth of field|dof|bokeh)\b", re.I),
    re.compile(r"\b(?:pov|ots|over the shoulder|point of view)\b", re.I),
    # Camera movement (T2I poison)
    re.compile(r"\b(?:pans?|zooms?|tilts?|tracking|crane)\b", re.I),
    # Film stock/style
    re.compile(r"\b(?:kodak|vision3|arri|alexa|film stock|visible grain|cinematic|chiaroscuro)\b", re.I),
    re.compile(r"\b(?:documentary|photorealistic|naturalistic|film grain)\b", re.I),
    re.compile(r"\b(?:moody|epic|dramatic lighting|studio lighting)\b", re.I),
    # Narrative voice
    re.compile(r"\b(?:we see|the audience|suddenly|begins to|appears to)\b", re.I),
    re.compile(r"\bcamera\b", re.I),
    # Abstract emotion
    re.compile(r"\b(?:tension|dread|hope|menace|ominous|foreboding)\b", re.I),
    # VFX/CGI
    re.compile(r"\b(?:holographic|targeting reticle|data overlay|heads-up|HUD|scan lines)\b", re.I),
]

# Word budget per shot_type (min, max) — two-character shots get (40, 60)
_PROSE_WORD_BUDGETS = {
    "ECU": (15, 30),
    "CU": (20, 40),
    "MCU": (20, 40),
    "MS": (25, 50),
    "LS": (25, 50),
    "WIDE": (25, 50),
}
_PROSE_TWO_CHAR_BUDGET = (30, 65)


def check_prose_spec(shots):
    """Validate first_frame/last_frame/hero_frame against Prose Spec v1.0.

    Returns (warnings, errors) — prose violations are warnings by default
    to avoid blocking storyboards during migration. Set to errors once
    the migration is complete.
    """
    warnings = []

    for shot in shots:
        sid = shot.get("id", "?")
        name = shot.get("name", "unnamed")
        prefix = f"Shot #{sid} '{name}'"
        shot_type = shot.get("shot_type", "MS")
        chars = shot.get("characters_in_shot", [])
        is_two_char = len(chars) >= 2

        for field_name in ("first_frame", "last_frame", "hero_frame"):
            text = shot.get(field_name)
            if not text:
                continue

            text_lower = text.lower()
            word_count = len(text.split())

            # Check banned terms
            for pattern in _PROSE_BANNED_PATTERNS:
                matches = pattern.findall(text_lower)
                if matches:
                    unique = set(m if isinstance(m, str) else m[0] for m in matches)
                    warnings.append(
                        f"PROSE: {prefix} {field_name} contains banned term(s): "
                        f"{', '.join(sorted(unique))}"
                    )

            # Check word budget
            if is_two_char:
                budget_min, budget_max = _PROSE_TWO_CHAR_BUDGET
            else:
                budget_min, budget_max = _PROSE_WORD_BUDGETS.get(
                    shot_type, (20, 50)
                )

            if word_count > budget_max:
                warnings.append(
                    f"PROSE: {prefix} {field_name} is {word_count} words "
                    f"(budget: {budget_min}-{budget_max} for {shot_type}"
                    f"{' two-char' if is_two_char else ''})"
                )

    return warnings


def _load_shot_count_target(project_path=None):
    """Load shot count target from project config, then CONSTANTS.md fallback.

    Resolution order:
      1. project_config.json → shots_per_episode (genre-specific)
      2. visual_grammar_bible.md → (future: parse from genre doc)
      3. CONSTANTS.md → SHOTS_PER_EPISODE (universal default)
      4. Hardcoded fallback: 18-24
    """
    # Try project config
    if project_path:
        config_path = os.path.join(project_path, "visual", "project_config.json")
        if os.path.exists(config_path):
            try:
                with open(config_path) as f:
                    config = json.load(f)
                spe = config.get("shots_per_episode")
                if spe:
                    if isinstance(spe, dict):
                        return spe.get("min", 18), spe.get("max", 24)
                    if isinstance(spe, str) and "-" in spe:
                        parts = spe.split("-")
                        return int(parts[0]), int(parts[1])
            except (json.JSONDecodeError, ValueError):
                pass

    # Try CONSTANTS.md
    constants_path = os.path.join(os.path.dirname(__file__), "..", "CONSTANTS.md")
    if os.path.exists(constants_path):
        try:
            with open(constants_path) as f:
                content = f.read()
            m = re.search(r"`SHOTS_PER_EPISODE`\s*\|\s*(\d+)-(\d+)", content)
            if m:
                return int(m.group(1)), int(m.group(2))
        except (IOError, ValueError):
            pass

    return 18, 24  # Hardcoded fallback


# ── Episode Parsing ──

REQUIRED_BEATS = [
    "THE HOOK",
    "THE SETUP",
    "THE ESCALATION",
    "THE TURN",
    "THE CLIFFHANGER",
]

BEAT_PATTERN = re.compile(
    r"^#\s*\[(\d{2}:\d{2})\s*-\s*(\d{2}:\d{2})\]\s*(THE\s+\w+)", re.IGNORECASE
)

SCENE_HEADING_PATTERN = re.compile(r"^(INT\.|EXT\.|INT/EXT\.|I/E\.)", re.IGNORECASE)
CHARACTER_PATTERN = re.compile(r"^[A-Z][A-Z0-9 ]{1,40}$")
METADATA_PATTERN = re.compile(r"^\*\*[^*]+\*\*")


def parse_episode(text):
    """Parse an episode .md file into beats and content lines.

    Returns:
      beats: list of {name, time_start, time_end, lines: [{type, text, line_num}]}
      raw_lines: all non-metadata content lines with their line numbers
    """
    lines = text.split("\n")
    beats = []
    raw_lines = []
    current_beat = None
    in_metadata = False
    prev_type = None

    for i, line in enumerate(lines, 1):
        stripped = line.strip()

        # Skip empty
        if not stripped:
            prev_type = None
            continue

        # Skip front matter metadata
        if stripped.startswith("# Episode") and i <= 3:
            continue
        if METADATA_PATTERN.match(stripped) and not current_beat:
            continue
        if stripped == "---":
            if current_beat:
                in_metadata = True
            continue
        if stripped.startswith("[[EPISODE"):
            continue
        if stripped == "===":
            continue

        # Post-episode metadata (after final ---)
        if in_metadata:
            continue

        # Beat header
        m = BEAT_PATTERN.match(stripped)
        if m:
            in_metadata = False
            current_beat = {
                "name": m.group(3).upper().strip(),
                "time_start": m.group(1),
                "time_end": m.group(2),
                "lines": [],
            }
            beats.append(current_beat)
            prev_type = "beat_header"
            continue

        if not current_beat:
            continue

        # Classify the line
        if SCENE_HEADING_PATTERN.match(stripped):
            line_type = "scene_heading"
        elif CHARACTER_PATTERN.match(stripped) and len(stripped) < 50:
            # Exclude common false positives
            if stripped not in ("AR OVERLAY", "FADE TO BLACK", "CUT TO"):
                line_type = "character"
            else:
                line_type = "action"
        elif stripped.startswith("(") and stripped.endswith(")"):
            line_type = "parenthetical"
        elif prev_type in ("character", "parenthetical", "dialogue"):
            line_type = "dialogue"
        else:
            line_type = "action"

        entry = {"type": line_type, "text": stripped, "line_num": i}
        current_beat["lines"].append(entry)
        raw_lines.append(entry)
        prev_type = line_type

    return beats, raw_lines


# ── Validation Checks ──

def check_beat_coverage(storyboard, episode_beats):
    """Verify all 5 required beats exist in both storyboard and episode."""
    errors = []
    warnings = []

    sb_beat_names = set(b["name"] for b in storyboard.get("beats", []))
    ep_beat_names = set(b["name"] for b in episode_beats)

    for beat in REQUIRED_BEATS:
        if beat not in sb_beat_names:
            errors.append(f"MISSING BEAT in storyboard: {beat}")
        if beat not in ep_beat_names:
            warnings.append(f"Beat not found in episode script: {beat}")

    # Check all shots reference valid beats
    for shot in storyboard.get("shots", []):
        if shot.get("beat") not in sb_beat_names:
            errors.append(
                f"Shot #{shot['id']} '{shot['name']}' references unknown beat: {shot.get('beat')}"
            )

    # Check each beat has at least one shot
    shot_beats = set(s.get("beat") for s in storyboard.get("shots", []))
    for beat in REQUIRED_BEATS:
        if beat not in shot_beats:
            errors.append(f"No shots assigned to beat: {beat}")

    return errors, warnings


def check_script_coverage(storyboard, episode_beats):
    """Verify that every content line in the episode is covered by at least one shot."""
    errors = []
    warnings = []

    # Collect all content lines from episode
    all_lines = []
    for beat in episode_beats:
        for line in beat["lines"]:
            all_lines.append(line)

    if not all_lines:
        warnings.append("No content lines found in episode")
        return errors, warnings

    # Collect all script_excerpt text from shots
    shot_excerpts = []
    for shot in storyboard.get("shots", []):
        excerpt = shot.get("script_excerpt", "").strip()
        if excerpt:
            shot_excerpts.append(excerpt)

    all_excerpt_text = " ".join(shot_excerpts).lower()
    # Word set for boundary-safe matching (avoids "hand" matching "handle")
    excerpt_words = set(re.findall(r"\b\w{4,}\b", all_excerpt_text))

    # Check each content line for coverage
    uncovered = []
    for line in all_lines:
        text = line["text"].strip()
        # Skip very short lines (character names, etc.)
        if len(text) < 10:
            continue

        # Check if any significant portion of this line appears in any excerpt
        text_lower = text.lower()
        found = False

        # Direct substring check (most common case)
        if text_lower in all_excerpt_text:
            found = True
        else:
            # Check significant words using word-boundary set lookup
            words = [w for w in re.findall(r"\b\w{4,}\b", text_lower)]
            if words:
                # At least 40% of significant words should appear in excerpts
                matches = sum(1 for w in words if w in excerpt_words)
                if matches / len(words) >= 0.4:
                    found = True

        if not found:
            uncovered.append(line)

    if uncovered:
        coverage_pct = ((len(all_lines) - len(uncovered)) / len(all_lines)) * 100
        if coverage_pct < 70:
            errors.append(
                f"Script coverage: {coverage_pct:.0f}% ({len(uncovered)}/{len(all_lines)} lines uncovered)"
            )
        else:
            warnings.append(
                f"Script coverage: {coverage_pct:.0f}% ({len(uncovered)} lines not in shot excerpts)"
            )

        for line in uncovered[:10]:  # Cap at 10 for readability
            beat_name = "?"
            for beat in episode_beats:
                if line in beat["lines"]:
                    beat_name = beat["name"]
                    break
            warnings.append(
                f"  Uncovered line {line['line_num']} ({beat_name}): "
                f'"{line["text"][:80]}{"..." if len(line["text"]) > 80 else ""}"'
            )
        if len(uncovered) > 10:
            warnings.append(f"  ...and {len(uncovered) - 10} more uncovered lines")

    return errors, warnings


def check_hallucination(storyboard, episode_text):
    """Check that shot content traces back to the actual script."""
    errors = []
    warnings = []

    episode_lower = episode_text.lower()
    # Build a word set from the episode for fast lookup
    episode_words = set(re.findall(r"\b\w{4,}\b", episode_lower))

    for shot in storyboard.get("shots", []):
        excerpt = shot.get("script_excerpt", "").strip()
        if not excerpt or len(excerpt) < 20:
            continue

        excerpt_lower = excerpt.lower()
        excerpt_words = [w for w in re.findall(r"\b\w{4,}\b", excerpt_lower)]

        if not excerpt_words:
            continue

        # Check how many words from the excerpt appear in the episode
        matches = sum(1 for w in excerpt_words if w in episode_words)
        ratio = matches / len(excerpt_words) if excerpt_words else 0

        if ratio < 0.3:
            errors.append(
                f"Shot #{shot['id']} '{shot['name']}': script_excerpt has low match "
                f"to episode ({ratio:.0%}). Possible hallucination."
            )
        elif ratio < 0.5:
            warnings.append(
                f"Shot #{shot['id']} '{shot['name']}': script_excerpt partially matches "
                f"episode ({ratio:.0%}). May need review."
            )

    return errors, warnings


def load_schema_enums():
    """Load valid enum values from the storyboard schema (single source of truth)."""
    schema_path = os.path.join(os.path.dirname(__file__), "..", "templates", "storyboard_schema.json")
    defaults = {
        "shot_types": {"ECU", "CU", "MCU", "MS", "LS", "WIDE", "POV", "VFX"},
        "camera_angles": {"eye", "low", "high", "overhead", "dutch"},
        "camera_movements": {"static", "pan", "dolly", "track", "handheld", "crane"},
        "aspects": {"16:9", "9:16"},
    }
    try:
        with open(schema_path) as f:
            schema = json.load(f)
        shot_props = schema["properties"]["shots"]["items"]["properties"]
        defaults["shot_types"] = set(shot_props["shot_type"].get("enum", defaults["shot_types"]))
        defaults["camera_angles"] = set(shot_props["camera_angle"].get("enum", defaults["camera_angles"]))
        defaults["camera_movements"] = set(shot_props["camera_movement"].get("enum", defaults["camera_movements"]))
        defaults["aspects"] = set(shot_props["aspect"].get("enum", defaults["aspects"]))
    except (FileNotFoundError, KeyError, json.JSONDecodeError):
        pass  # Fall back to defaults
    return defaults


def check_shot_integrity(storyboard):
    """Validate individual shot fields and structural integrity."""
    errors = []
    warnings = []
    shots = storyboard.get("shots", [])

    if not shots:
        errors.append("Storyboard has no shots")
        return errors, warnings

    # Load enums from schema (single source of truth)
    enums = load_schema_enums()
    valid_types = enums["shot_types"]
    valid_angles = enums["camera_angles"]
    valid_movements = enums["camera_movements"]
    valid_aspects = enums["aspects"]

    seen_ids = set()

    for shot in shots:
        sid = shot.get("id", "?")
        name = shot.get("name", "unnamed")
        prefix = f"Shot #{sid} '{name}'"

        # Unique IDs
        if sid in seen_ids:
            errors.append(f"{prefix}: duplicate ID")
        seen_ids.add(sid)

        # Required fields
        if not shot.get("first_frame"):
            errors.append(f"{prefix}: missing first_frame prompt")
        if not shot.get("last_frame"):
            warnings.append(f"{prefix}: missing last_frame prompt")
        if not shot.get("subject"):
            warnings.append(f"{prefix}: missing subject")

        # Valid enums
        st = shot.get("shot_type", "")
        if st not in valid_types:
            errors.append(f"{prefix}: invalid shot_type '{st}' (valid: {valid_types})")

        angle = shot.get("camera_angle", "")
        if angle and angle not in valid_angles:
            warnings.append(f"{prefix}: invalid camera_angle '{angle}'")

        movement = shot.get("camera_movement", "")
        if movement and movement not in valid_movements:
            warnings.append(f"{prefix}: invalid camera_movement '{movement}'")

        aspect = shot.get("aspect", "")
        if aspect and aspect not in valid_aspects:
            warnings.append(f"{prefix}: invalid aspect '{aspect}'")

        # Dimension/aspect consistency
        w = shot.get("width", 0)
        h = shot.get("height", 0)
        if aspect == "16:9" and h > w:
            errors.append(f"{prefix}: aspect 16:9 but height ({h}) > width ({w})")
        if aspect == "9:16" and w > h:
            errors.append(f"{prefix}: aspect 9:16 but width ({w}) > height ({h})")

        # Multiple-of-16 check (Flux 2 requirement)
        if w > 0 and w % 16 != 0:
            errors.append(f"{prefix}: width {w} is not a multiple of 16 (Flux 2 requirement)")
        if h > 0 and h % 16 != 0:
            errors.append(f"{prefix}: height {h} is not a multiple of 16 (Flux 2 requirement)")

    # Shot distribution by type
    type_counts = {}
    for shot in shots:
        t = shot.get("shot_type", "?")
        type_counts[t] = type_counts.get(t, 0) + 1

    # Flag if all shots are the same type
    if len(type_counts) == 1:
        warnings.append(
            f"All {len(shots)} shots are {list(type_counts.keys())[0]} — consider variety"
        )

    # Shot count check moved to check_shot_grammar() with configurable target

    return errors, warnings


def _check_180_rule(scene_shots, warnings):
    """Check 180° rule within a scene: camera_side should not flip without motivation."""
    prev_side = None
    prev_shot_type = None
    for shot in scene_shots:
        spatial = shot.get("spatial") or {}
        side = spatial.get("camera_side")
        shot_type = shot.get("shot_type", "")
        if not side:
            prev_shot_type = shot_type
            continue
        if prev_side and side != prev_side:
            # POV shots are ON the line — exempt from crossing warnings
            if shot_type == "POV" or prev_shot_type == "POV":
                prev_side = side
                prev_shot_type = shot_type
                continue
            warnings.append(
                f"GRAMMAR: 180° rule — shot #{shot['id']} crosses the line "
                f"(camera side {prev_side}→{side}) within a scene. "
                f"Deliberate line-crossing is valid but should be motivated "
                f"(dolly cross, establishing shot reset, or power shift)."
            )
        prev_side = side
        prev_shot_type = shot_type


def _check_screen_direction(scene_shots, warnings):
    """Check screen direction doesn't reverse without an establishing shot between."""
    prev_dir = None
    prev_id = None
    for shot in scene_shots:
        spatial = shot.get("spatial") or {}
        direction = spatial.get("screen_direction")
        if not direction:
            continue
        # Reset tracking after establishing shots
        if shot.get("shot_type") in ("WIDE", "LS"):
            prev_dir = direction
            prev_id = shot["id"]
            continue
        if prev_dir and direction != prev_dir:
            # Check if they're opposite directions
            opposites = {
                "left-to-right": "right-to-left",
                "right-to-left": "left-to-right",
                "toward-camera": "away-from-camera",
                "away-from-camera": "toward-camera",
            }
            if opposites.get(prev_dir) == direction:
                warnings.append(
                    f"GRAMMAR: Screen direction reversal — shot #{prev_id}→#{shot['id']} "
                    f"({prev_dir}→{direction}) without establishing shot between. "
                    f"Add WIDE/LS to reset spatial context, or ensure reversal is story-motivated."
                )
        prev_dir = direction
        prev_id = shot["id"]


def check_shot_grammar(storyboard, project_path=None):
    """Check adjacent shot pairs for edit-level problems.

    Enforces universal film grammar regardless of genre:
    - Scale variety (no monotonous sequences)
    - Establishing shots at scene boundaries
    - Jump cut detection
    - Shot rhythm across sequences
    - Scene coverage (tight + medium + wide)
    """
    errors = []
    warnings = []
    shots = storyboard.get("shots", [])

    if len(shots) < 2:
        return errors, warnings

    # ── Check 1: Scale variety (no 3+ consecutive same shot_type) ──
    consecutive = 1
    for i in range(1, len(shots)):
        curr_type = shots[i].get("shot_type", "")
        prev_type = shots[i - 1].get("shot_type", "")
        if curr_type in SPECIAL_TYPES or prev_type in SPECIAL_TYPES:
            consecutive = 1
            continue
        if curr_type == prev_type:
            consecutive += 1
            if consecutive >= 3:
                start_idx = i - consecutive + 1
                errors.append(
                    f"GRAMMAR: {consecutive} consecutive {curr_type} shots "
                    f"(#{shots[start_idx]['id']}-#{shots[i]['id']}). "
                    f"Vary shot scale to create visual rhythm."
                )
        else:
            consecutive = 1

    # ── Check 2: Establishing shots at scene boundaries ──
    for shot in shots:
        if shot.get("scene_break_before", False):
            st = shot.get("shot_type", "")
            if st not in WIDE_SCALES and st not in SPECIAL_TYPES:
                warnings.append(
                    f"GRAMMAR: Shot #{shot['id']} '{shot['name']}' is a scene boundary "
                    f"but opens with {st}. Consider establishing with WIDE or LS first."
                )

    # ── Check 3: Jump cut risk ──
    for i in range(1, len(shots)):
        curr = shots[i]
        prev = shots[i - 1]
        curr_type = curr.get("shot_type", "")
        prev_type = prev.get("shot_type", "")
        curr_angle = curr.get("camera_angle", "eye")
        prev_angle = prev.get("camera_angle", "eye")

        if curr_type in SPECIAL_TYPES or prev_type in SPECIAL_TYPES:
            continue

        # Same characters in both shots?
        curr_chars = set(curr.get("characters_in_shot", []))
        prev_chars = set(prev.get("characters_in_shot", []))
        shared_chars = curr_chars & prev_chars

        if not shared_chars:
            continue

        # Exact match: same subject, scale, angle → hard fail
        if curr_type == prev_type and curr_angle == prev_angle:
            errors.append(
                f"GRAMMAR: Jump cut — shots #{prev['id']}-#{curr['id']} "
                f"have same subject ({', '.join(sorted(shared_chars))}), "
                f"scale ({curr_type}), and angle ({curr_angle}). "
                f"Change scale or angle between shots."
            )
        # Adjacent scales + same angle → soft warning
        elif curr_angle == prev_angle:
            curr_order = SCALE_ORDER.get(curr_type, -1)
            prev_order = SCALE_ORDER.get(prev_type, -1)
            if curr_order >= 0 and prev_order >= 0 and abs(curr_order - prev_order) <= 1:
                warnings.append(
                    f"GRAMMAR: Possible jump cut — shots #{prev['id']}-#{curr['id']} "
                    f"have same subject, similar scale ({prev_type}→{curr_type}), "
                    f"same angle ({curr_angle}). Consider a wider scale change."
                )

    # ── Check 4: Scale rhythm (5+ shots with no scale variety) ──
    for i in range(len(shots) - 4):
        window = shots[i : i + 5]
        types = [
            s.get("shot_type", "")
            for s in window
            if s.get("shot_type", "") not in SPECIAL_TYPES
        ]
        if len(types) < 5:
            continue
        if all(t in TIGHT_SCALES for t in types):
            warnings.append(
                f"GRAMMAR: 5 consecutive tight shots "
                f"(#{window[0]['id']}-#{window[4]['id']}). "
                f"Add a wider shot for visual breathing room."
            )
        elif all(t in WIDE_SCALES for t in types):
            warnings.append(
                f"GRAMMAR: 5 consecutive wide shots "
                f"(#{window[0]['id']}-#{window[4]['id']}). "
                f"Go tight to establish intimacy."
            )

    # ── Check 5: Scene coverage ──
    scenes = []
    current_scene_shots = []
    for shot in shots:
        if shot.get("scene_break_before", False) and current_scene_shots:
            scenes.append(current_scene_shots)
            current_scene_shots = []
        current_scene_shots.append(shot)
    if current_scene_shots:
        scenes.append(current_scene_shots)

    for scene_shots in scenes:
        if len(scene_shots) < 4:
            continue  # Too few shots to expect full coverage
        scene_types = set(s.get("shot_type", "") for s in scene_shots)
        has_tight = bool(scene_types & TIGHT_SCALES)
        has_wide = bool(scene_types & WIDE_SCALES)

        first_id = scene_shots[0]["id"]
        last_id = scene_shots[-1]["id"]

        if not has_tight:
            warnings.append(
                f"GRAMMAR: Scene (shots #{first_id}-#{last_id}) has no close shots. "
                f"Add CU/ECU for emotional connection."
            )
        if not has_wide:
            warnings.append(
                f"GRAMMAR: Scene (shots #{first_id}-#{last_id}) has no establishing shots. "
                f"Add WIDE/LS for spatial context."
            )

    # ── Check 6: Scale Change Minimum (axis/scale rule from appendix_g) ──
    # Supersedes Check 3's exact-match detection with a more nuanced rule:
    # consecutive shots with shared characters need 2+ SCALE_ORDER steps OR different angle
    for i in range(1, len(shots)):
        curr = shots[i]
        prev = shots[i - 1]
        curr_type = curr.get("shot_type", "")
        prev_type = prev.get("shot_type", "")
        curr_angle = curr.get("camera_angle", "eye")
        prev_angle = prev.get("camera_angle", "eye")

        if curr_type in SPECIAL_TYPES or prev_type in SPECIAL_TYPES:
            continue

        # Only applies to shots with shared characters
        curr_chars = set(curr.get("characters_in_shot", []))
        prev_chars = set(prev.get("characters_in_shot", []))
        shared_chars = curr_chars & prev_chars

        if not shared_chars:
            continue

        # Skip if already caught by Check 3 (exact match)
        if curr_type == prev_type and curr_angle == prev_angle:
            continue

        # Calculate scale distance
        curr_order = SCALE_ORDER.get(curr_type, -1)
        prev_order = SCALE_ORDER.get(prev_type, -1)

        if curr_order < 0 or prev_order < 0:
            continue

        scale_distance = abs(curr_order - prev_order)

        # If scale distance < 2 AND same angle → too similar
        if scale_distance < 2 and curr_angle == prev_angle:
            errors.append(
                f"GRAMMAR: Scale change too small — shots #{prev['id']}-#{curr['id']} "
                f"share subject ({', '.join(sorted(shared_chars))}), "
                f"scale change {prev_type}→{curr_type} ({scale_distance} step), "
                f"same angle ({curr_angle}). Need 2+ scale steps or different angle. "
                f"(ref: appendix_g §B, CONSTANTS.md → SCALE_CHANGE_MIN_STEPS)"
            )

    # ── Check 7: ECU Minimum ──
    ecu_count = sum(1 for s in shots if s.get("shot_type") == "ECU")
    if ecu_count < 2:
        warnings.append(
            f"GRAMMAR: Only {ecu_count} ECU shot(s) — vertical format benefits from "
            f"at least 2 extreme close-ups per episode (eyes, hands, objects). "
            f"(ref: appendix_g §A, CONSTANTS.md → ECU_MIN_PER_EPISODE)"
        )

    # ── Check 8: Shot Scale Distribution ──
    total = len(shots)
    if total >= 10:  # Only check distribution with enough shots
        type_counts_grammar = {}
        for s in shots:
            t = s.get("shot_type", "")
            if t not in SPECIAL_TYPES:
                type_counts_grammar[t] = type_counts_grammar.get(t, 0) + 1

        non_special_total = sum(type_counts_grammar.values())
        if non_special_total > 0:
            # Target ranges from appendix_g §A
            targets = {
                "ECU": (10, 15),
                "CU": (20, 30),
                "MCU": (25, 35),
                "MS": (15, 25),
                "LS": (5, 15),
                "WIDE": (5, 15),
            }
            # Combine LS and WIDE for the wide category
            wide_count = type_counts_grammar.get("LS", 0) + type_counts_grammar.get("WIDE", 0)
            dist = {
                "ECU": type_counts_grammar.get("ECU", 0),
                "CU": type_counts_grammar.get("CU", 0),
                "MCU": type_counts_grammar.get("MCU", 0),
                "MS": type_counts_grammar.get("MS", 0),
                "WIDE/LS": wide_count,
            }
            target_map = {
                "ECU": (10, 15),
                "CU": (20, 30),
                "MCU": (25, 35),
                "MS": (15, 25),
                "WIDE/LS": (5, 15),
            }
            tolerance = 15  # percentage points

            out_of_range = []
            for scale_name, count in dist.items():
                pct = (count / non_special_total) * 100
                lo, hi = target_map[scale_name]
                if pct < lo - tolerance or pct > hi + tolerance:
                    out_of_range.append(
                        f"{scale_name}: {pct:.0f}% (target {lo}-{hi}%)"
                    )

            if out_of_range:
                warnings.append(
                    f"GRAMMAR: Shot scale distribution outside target range — "
                    f"{'; '.join(out_of_range)}. "
                    f"(ref: appendix_g §A, informational — trial and error)"
                )

    # ── Check 9: Action Beat ECU Presence ──
    action_beats = {"THE ESCALATION", "THE TURN"}
    for beat_name in action_beats:
        beat_shots = [s for s in shots if s.get("beat") == beat_name]
        if len(beat_shots) >= 3:  # Only check beats with enough shots
            has_ecu_or_cu = any(
                s.get("shot_type") in {"ECU", "CU"} for s in beat_shots
            )
            if not has_ecu_or_cu:
                warnings.append(
                    f"GRAMMAR: {beat_name} has {len(beat_shots)} shots but no ECU or CU. "
                    f"Action beats benefit from ECU/CU punch-ins for emotional impact. "
                    f"(ref: appendix_g §C, §D)"
                )

    # ── Check 10: 180° Rule (camera_side consistency within scenes) ──
    current_scene_shots_spatial = []
    for shot in shots:
        if shot.get("scene_break_before", False) and current_scene_shots_spatial:
            # Check the completed scene
            _check_180_rule(current_scene_shots_spatial, warnings)
            current_scene_shots_spatial = []
        current_scene_shots_spatial.append(shot)
    if current_scene_shots_spatial:
        _check_180_rule(current_scene_shots_spatial, warnings)

    # ── Check 11: Screen Direction Consistency ──
    current_scene_shots_dir = []
    for shot in shots:
        if shot.get("scene_break_before", False) and current_scene_shots_dir:
            _check_screen_direction(current_scene_shots_dir, warnings)
            current_scene_shots_dir = []
        current_scene_shots_dir.append(shot)
    if current_scene_shots_dir:
        _check_screen_direction(current_scene_shots_dir, warnings)

    # ── Check 12: 30° Angle Minimum (strengthen Check 6) ──
    for i in range(1, len(shots)):
        curr = shots[i]
        prev = shots[i - 1]
        curr_type = curr.get("shot_type", "")
        prev_type = prev.get("shot_type", "")

        if curr_type in SPECIAL_TYPES or prev_type in SPECIAL_TYPES:
            continue

        curr_chars = set(curr.get("characters_in_shot", []))
        prev_chars = set(prev.get("characters_in_shot", []))
        if not (curr_chars & prev_chars):
            continue

        curr_spatial = curr.get("spatial") or {}
        prev_spatial = prev.get("spatial") or {}
        curr_angle = curr.get("camera_angle", "eye")
        prev_angle = prev.get("camera_angle", "eye")
        curr_side = curr_spatial.get("camera_side", "")
        prev_side = prev_spatial.get("camera_side", "")

        curr_order = SCALE_ORDER.get(curr_type, -1)
        prev_order = SCALE_ORDER.get(prev_type, -1)

        if curr_order >= 0 and prev_order >= 0:
            scale_distance = abs(curr_order - prev_order)
            # Same angle AND same camera_side AND only 1 step scale change
            if (curr_angle == prev_angle
                    and curr_side == prev_side and curr_side
                    and scale_distance == 1):
                warnings.append(
                    f"GRAMMAR: Possible jump cut (30° rule) — shots #{prev['id']}-#{curr['id']} "
                    f"share subject, same angle ({curr_angle}), same camera side ({curr_side}), "
                    f"only {scale_distance} scale step ({prev_type}→{curr_type}). "
                    f"Consider a 30° axis shift."
                )

    # ── Check 13: Spatial Completeness ──
    scenes_for_completeness = []
    scene_buf = []
    for shot in shots:
        if shot.get("scene_break_before", False) and scene_buf:
            scenes_for_completeness.append(scene_buf)
            scene_buf = []
        scene_buf.append(shot)
    if scene_buf:
        scenes_for_completeness.append(scene_buf)

    for scene_shots_c in scenes_for_completeness:
        # Find scenes with any characters
        all_chars_in_scene = set()
        for s in scene_shots_c:
            all_chars_in_scene.update(s.get("characters_in_shot", []))
        if not all_chars_in_scene:
            continue

        has_spatial = any(s.get("spatial") for s in scene_shots_c)
        if not has_spatial:
            first_id = scene_shots_c[0]["id"]
            last_id = scene_shots_c[-1]["id"]
            warnings.append(
                f"GRAMMAR: Scene (shots #{first_id}-#{last_id}) has characters "
                f"({', '.join(sorted(all_chars_in_scene))}) but no spatial data on any shot. "
                f"Add spatial.camera_side and spatial.blocking for 180° rule enforcement. "
                f"The line of action exists even for solo characters (between character and engagement object)."
            )

    # Check edge_continuity populated on angle-change cuts
    for i in range(1, len(shots)):
        curr = shots[i]
        prev = shots[i - 1]
        if curr.get("scene_break_before", False):
            continue
        curr_angle = curr.get("camera_angle", "eye")
        prev_angle = prev.get("camera_angle", "eye")
        if curr_angle != prev_angle and not curr.get("edge_continuity"):
            # Only warn if both have characters (not ENV shots)
            if curr.get("characters_in_shot") and prev.get("characters_in_shot"):
                warnings.append(
                    f"GRAMMAR: Shot #{curr['id']} changes angle ({prev_angle}→{curr_angle}) "
                    f"but has no edge_continuity. Add spatial_note for cut boundary context."
                )

    # ── Configurable shot count (replaces hardcoded check in check_shot_integrity) ──
    shot_min, shot_max = _load_shot_count_target(project_path)
    shot_count = len(shots)
    low_threshold = max(shot_min - 4, 8)  # Generous lower bound
    high_threshold = shot_max + 6
    if shot_count < low_threshold:
        warnings.append(
            f"GRAMMAR: Only {shot_count} shots — target is {shot_min}-{shot_max} "
            f"(configurable via project_config.json → shots_per_episode)"
        )
    elif shot_count > high_threshold:
        warnings.append(
            f"GRAMMAR: {shot_count} shots — unusually high "
            f"(target {shot_min}-{shot_max}), may want to consolidate"
        )

    return errors, warnings


def check_beat_shot_distribution(storyboard):
    """Check that shot distribution across beats is reasonable."""
    errors = []
    warnings = []

    beat_shots = {}
    for shot in storyboard.get("shots", []):
        beat = shot.get("beat", "?")
        beat_shots.setdefault(beat, []).append(shot)

    # Expected proportions based on timing
    # HOOK ~5s, SETUP ~10s, ESCALATION ~25s, TURN ~30s, CLIFFHANGER ~20s
    for beat_name, shots in beat_shots.items():
        count = len(shots)
        if beat_name == "THE HOOK" and count > 5:
            warnings.append(
                f"{beat_name}: {count} shots seems high for a 5-second beat"
            )
        if beat_name in ("THE ESCALATION", "THE TURN") and count < 2:
            warnings.append(
                f"{beat_name}: only {count} shot(s) — this is the longest beat, "
                "consider more coverage"
            )

    return errors, warnings


# ── Main ──

def validate(storyboard_path, episode_path, project_path=None):
    """Run all validation checks. Returns (is_valid, errors, warnings, stats)."""
    # Load files
    with open(storyboard_path) as f:
        storyboard = json.load(f)

    with open(episode_path) as f:
        episode_text = f.read()

    episode_beats, raw_lines = parse_episode(episode_text)

    # Infer project_path if not provided
    if not project_path:
        # Storyboard lives in [project]/storyboards/ — go up two levels
        sb_dir = os.path.dirname(os.path.abspath(storyboard_path))
        candidate = os.path.dirname(sb_dir)
        if os.path.isdir(os.path.join(candidate, "bible")):
            project_path = candidate

    all_errors = []
    all_warnings = []

    # Version check: load expected version from schema, compare against storyboard
    schema_path = os.path.join(os.path.dirname(__file__), "..", "templates", "storyboard_schema.json")
    try:
        with open(schema_path) as f:
            schema = json.load(f)
        expected_version = schema.get("properties", {}).get("version", {}).get("const")
        sb_version = storyboard.get("version")
        if expected_version is not None:
            if sb_version is None:
                all_errors.append(f"Storyboard missing 'version' field (expected {expected_version})")
            elif sb_version != expected_version:
                all_errors.append(f"Storyboard version {sb_version} does not match schema version {expected_version}")
    except (FileNotFoundError, json.JSONDecodeError):
        all_warnings.append("Could not load storyboard_schema.json for version check")

    # Run checks
    for check_fn, args in [
        (check_beat_coverage, (storyboard, episode_beats)),
        (check_script_coverage, (storyboard, episode_beats)),
        (check_hallucination, (storyboard, episode_text)),
        (check_shot_integrity, (storyboard,)),
        (check_shot_grammar, (storyboard, project_path)),
        (check_beat_shot_distribution, (storyboard,)),
    ]:
        errs, warns = check_fn(*args)
        all_errors.extend(errs)
        all_warnings.extend(warns)

    # Prose Spec v1.0 check (warnings only — does not block validation)
    prose_warnings = check_prose_spec(storyboard.get("shots", []))
    all_warnings.extend(prose_warnings)

    # Stats
    shots = storyboard.get("shots", [])
    type_counts = {}
    for s in shots:
        t = s.get("shot_type", "?")
        type_counts[t] = type_counts.get(t, 0) + 1

    beat_counts = {}
    for s in shots:
        b = s.get("beat", "?")
        beat_counts[b] = beat_counts.get(b, 0) + 1

    prompts_filled = sum(1 for s in shots if s.get("first_frame"))
    motion_filled = sum(1 for s in shots if s.get("motion_prompt"))

    # Grammar-specific stats
    grammar_errors = [e for e in all_errors if e.startswith("GRAMMAR:")]
    grammar_warnings = [w for w in all_warnings if w.startswith("GRAMMAR:")]

    stats = {
        "total_shots": len(shots),
        "episode_content_lines": len(raw_lines),
        "episode_beats": len(episode_beats),
        "shot_types": type_counts,
        "shots_per_beat": beat_counts,
        "prompts_filled": prompts_filled,
        "motion_prompts_filled": motion_filled,
        "grammar_errors": len(grammar_errors),
        "grammar_warnings": len(grammar_warnings),
        "prose_warnings": len(prose_warnings),
    }

    is_valid = len(all_errors) == 0
    return is_valid, all_errors, all_warnings, stats


def main():
    parser = argparse.ArgumentParser(
        description="Validate storyboard JSON against episode script"
    )
    parser.add_argument("storyboard", help="Path to storyboard JSON")
    parser.add_argument("episode", help="Path to episode .md file")
    parser.add_argument("--json", action="store_true", help="Output as JSON")
    parser.add_argument(
        "--prompt",
        action="store_true",
        help="Output fix instructions for Claude",
    )
    parser.add_argument(
        "--project",
        help="Path to project folder (for configurable shot count target)",
    )

    args = parser.parse_args()

    # Verify files exist
    for path, label in [
        (args.storyboard, "Storyboard"),
        (args.episode, "Episode"),
    ]:
        if not os.path.exists(path):
            print(f"ERROR: {label} not found: {path}")
            sys.exit(2)

    try:
        is_valid, errors, warnings, stats = validate(
            args.storyboard, args.episode, project_path=args.project
        )
    except json.JSONDecodeError as e:
        print(f"ERROR: Invalid JSON in storyboard: {e}")
        sys.exit(2)
    except Exception as e:
        print(f"ERROR: {e}")
        sys.exit(2)

    if args.json:
        output = {
            "is_valid": is_valid,
            "errors": errors,
            "warnings": warnings,
            "stats": stats,
        }
        print(json.dumps(output, indent=2))

    elif args.prompt:
        if is_valid and not warnings:
            print("Storyboard is valid. No changes needed.")
        else:
            print("FIX INSTRUCTIONS FOR STORYBOARD:")
            print()
            if errors:
                print("ERRORS (must fix):")
                for e in errors:
                    print(f"  - {e}")
                print()
            if warnings:
                print("WARNINGS (should review):")
                for w in warnings:
                    print(f"  - {w}")
                print()
            print("STEPS:")
            if any("MISSING BEAT" in e for e in errors):
                print("  1. Add missing beats to the beats array")
            if any("missing first_frame" in e for e in errors):
                print(
                    "  2. Fill first_frame prompts for all shots using: "
                    "[shot_type] of [subject], [action], [location], [cinematic]"
                )
            if any("coverage" in e.lower() for e in errors):
                print(
                    "  3. Add shots to cover uncovered script lines. "
                    "Map each action/dialogue block to at least one shot."
                )
            if any("hallucination" in e.lower() for e in errors):
                print(
                    "  4. Fix script_excerpt fields — they must contain actual text "
                    "from the episode, not invented content."
                )
            if any("dimension" in e.lower() or "aspect" in e.lower() for e in errors):
                print(
                    "  5. Fix dimension/aspect mismatches: "
                    "16:9 = 1024x576, 9:16 = 768x1024"
                )
            grammar_issues = [e for e in errors + warnings if "GRAMMAR:" in e]
            if grammar_issues:
                print(
                    "  6. SHOT GRAMMAR fixes:"
                )
                for gi in grammar_issues:
                    # Extract the specific fix from the message (after the last period or colon)
                    print(f"     - {gi.replace('GRAMMAR: ', '')}")
                print()
                print("  Grammar rules: vary scale between adjacent shots, establish scenes")
                print("  with wide shots, avoid jump cuts (same subject+scale+angle).")
                print("  Shot count target is configurable via project_config.json → shots_per_episode.")

    else:
        # Human-readable output
        print(f"=== Storyboard Validation ===")
        print(f"Storyboard: {os.path.basename(args.storyboard)}")
        print(f"Episode:    {os.path.basename(args.episode)}")
        print()

        print(f"Shots: {stats['total_shots']}")
        print(f"Episode lines: {stats['episode_content_lines']}")
        print(f"Prompts filled: {stats['prompts_filled']}/{stats['total_shots']}")
        print(f"Motion prompts: {stats['motion_prompts_filled']}/{stats['total_shots']}")
        print()

        if stats["shot_types"]:
            types_str = ", ".join(
                f"{t}: {c}" for t, c in sorted(stats["shot_types"].items())
            )
            print(f"Shot types: {types_str}")

        if stats["shots_per_beat"]:
            beats_str = ", ".join(
                f"{b}: {c}" for b, c in sorted(stats["shots_per_beat"].items())
            )
            print(f"Per beat: {beats_str}")
        print()

        if errors:
            print(f"ERRORS ({len(errors)}):")
            for e in errors:
                print(f"  ✗ {e}")
            print()

        if warnings:
            print(f"WARNINGS ({len(warnings)}):")
            for w in warnings:
                print(f"  ! {w}")
            print()

        if is_valid:
            print("RESULT: VALID")
        else:
            print("RESULT: INVALID — fix errors before proceeding")

    sys.exit(0 if is_valid else 1)


if __name__ == "__main__":
    main()
