"""
ingest_pipeline.py — Four-stage render extraction pipeline.

Extracts all visual production data from completed screenplays.

Stage 0: Camera-Test Pass (Gemini 2.5 Flash)
  - Breaks raw screenplay into shot-boundary text blocks
  - Enforces 28-41 shot budget per episode
  - Output: projects/{project}/state/visual/camera_tested/ep_NNN.json

Stage 1: Breakdown Pass / Global Bible (Gemini 3.1 Pro)
  - Synthesizes characters with phases, locations, props, lighting motifs
  - Input: All camera-tested JSONs + character bible + project config
  - Output: projects/{project}/state/visual/global_bible.json

Stage 1.5: Opus Enrichment (Claude Opus 4.6)
  - Fills [OPUS_ENRICHMENT] placeholders in the global bible
  - Character visual descriptions, location atmospheres, wardrobe direction
  - Groups fields by entity for coherent per-entity enrichment
  - Optional: auto-skipped if ANTHROPIC_API_KEY not set
  - Output: projects/{project}/state/visual/global_bible.json (enriched in-place)

Stage 2: Storyboard Pass (Gemini 2.5 Flash)
  - 1:1 mapping of camera-tested text blocks → render-ready shot records
  - Input: 1 camera-tested JSON + global bible
  - Output: projects/{project}/state/visual/plans/ep_NNN_plan.json

Usage:
    from orchestrator.ingest_pipeline import IngestPipeline

    pipeline = IngestPipeline(project="leviathan", project_root=Path("../projects/leviathan"))
    pipeline.run_camera_test(episode_num=1)
    pipeline.run_breakdown_pass()
    pipeline.run_opus_enrichment()           # Stage 1.5 standalone
    pipeline.run_storyboard_pass(episode_num=1)
"""

from __future__ import annotations

import hashlib
import json
import logging
import os
import re
import sys
import time
from datetime import datetime
from pathlib import Path

# Ensure project root is on path for imports
_PROJECT_ROOT = Path(__file__).parent.parent
if str(_PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(_PROJECT_ROOT))

from lib.model_profiles import get_model
from lib.render_schema import (
    CameraTestedEpisode,
    EpisodePlan,
    GlobalBible,
    validate_camera_test_budget,
    validate_plan_against_bible,
)

logger = logging.getLogger(__name__)

STARSEND_ROOT = Path(__file__).parent.parent

# Legacy data dirs (kept for reference; pipeline now uses per-project paths)
DATA_DIR = STARSEND_ROOT / "data"

# Default extraction model — Opus 4.6 per ADR H07.
# Gemini reserved as configurable fallback.
DEFAULT_EXTRACTION_MODEL = get_model("extraction", "text")

# Gemini fallback models (used when extraction_model is a Gemini variant)
CAMERA_TEST_MODEL = get_model("pro", "text")
BREAKDOWN_MODEL = get_model("pro", "text")
STORYBOARD_MODEL = get_model("pro", "text")

MAX_RETRIES = 2
DEFAULT_BATCH_SIZE = 5

# Opus enrichment model + pricing
OPUS_ENRICHMENT_MODEL = "claude-opus-4-6"
OPUS_INPUT_COST_PER_M = 15.0   # $/M tokens
OPUS_OUTPUT_COST_PER_M = 75.0  # $/M tokens

# Field-type-dependent format instructions for Opus enrichment
FORMAT_INSTRUCTIONS = {
    "visual_description": (
        "Output a dense comma-separated list of concrete physical descriptors. "
        "FACE AND BODY ONLY — no wardrobe, no accessories, no narrative prose, no metaphors. "
        "Be maximally specific: exact age range, build type, skin tone and undertone, "
        "hair texture/color/length/style, facial bone structure, eye color/shape/spacing, "
        "nose shape, mouth shape, distinguishing physical traits. "
        "Every token must be renderable by an image generation model. "
        "Example: 'late 20s, lean angular build, warm olive skin, dark brown hair cropped "
        "short with grey at temples, deep-set hazel eyes with permanent fatigue shadows, "
        "sharp cheekbones, thin deliberate mouth, prominent collarbones'"
    ),
    "atmosphere": (
        "Write 2-3 sentences of evocative cinematic prose describing the mood, "
        "light quality, sound texture, temperature, and emotional feel of this location. "
        "Write for a cinematographer scouting the space, not a novelist. "
        "Be sensory-specific: name the light source, the color temperature, "
        "what you hear when the dialogue stops, what the air feels like."
    ),
    "wardrobe_arc_thesis": (
        "Write 2-3 sentences explaining the dramatic logic of this character's wardrobe "
        "evolution across the series. What does their clothing reveal about their "
        "psychological state at each turning point? Reference specific phase transitions."
    ),
    "wardrobe_arc_vision": (
        "Write 2-3 sentences of visual direction for this character's wardrobe. "
        "Reference specific textures, color palettes, silhouettes, and real-world "
        "style influences. A costume designer should be able to pull a rack from this."
    ),
    "wardrobe_philosophy": (
        "Write 2-3 sentences of project-level wardrobe direction. "
        "What is the unifying visual language across all characters? "
        "What does clothing mean in this world? How does wardrobe serve the story's themes?"
    ),
}


# ---------------------------------------------------------------------------
# Prompts
# ---------------------------------------------------------------------------

CAMERA_TEST_SYSTEM_PROMPT = """\
You are the Lead Editor for a vertical microdrama. Your job is to perform a \
"Camera-Test" on a raw screenplay, breaking it down into exact, individual \
camera shots.

RULES:
1. ONE SHOT = ONE ARRAY ITEM.
2. SPLIT SHOTS WHEN: The subject changes, the camera scale changes \
(e.g., Wide to Close-Up), the location changes, screen direction reverses, \
or there is a time jump.
3. SUSPENSE FRAGMENTATION: In high-tension moments, single sentences or \
fragments SHOULD be their own shots (e.g., punch-ins, ticking clocks, \
sudden reactions).
4. KEEP TOGETHER: Dialogue and the action describing that dialogue belong \
in the same shot. Verbless fragments continuing the same visual beat stay \
together.
5. SHOT BUDGET: You MUST output between {min_shots} and {max_shots} shots \
for this episode. If there is too much action, consolidate minor beats \
into single continuous tracking shots. If there is too little, punch in \
for reaction shots.
6. CHARACTER DETECTION: List all character names that appear or are \
referenced in each shot's text.
7. LOCATION HINTS: If a scene heading (INT./EXT.) precedes a shot, \
include it as location_hint.
8. DIALOGUE DETECTION: Mark has_dialogue=true if the text contains \
character dialogue (a character name followed by their spoken line).
9. SCENE INDEX: Assign a scene_index (starting at 1) to each shot based \
on NARRATIVE scene boundaries. A new scene starts when: the dramatic beat \
changes, a new conversation topic begins, there's a tonal shift, or a new \
confrontation/interaction begins. Do NOT rely solely on location changes — \
multiple scenes can happen in one location (e.g., a conversation and then \
a fight in the same room are separate scenes). Consecutive shots in the \
same dramatic beat share the same scene_index."""

BREAKDOWN_SYSTEM_PROMPT = """\
You are the Structural Synthesis Engine for Starsend, a cinematic rendering engine.
You are performing PASS 1 (Structural Extraction) of a 2-pass architecture \
for the project "{project_name}".

INPUTS:
1. A camera-tested screenplay containing {total_episodes} episodes.
2. The author's official Project Metadata (Character Bibles, Project Config).
3. [IF MERGING] An existing GlobalBible JSON from previous episodes.

YOUR GOAL:
Synthesize a single, canonical Global Render Bible tracking state changes \
across all {total_episodes} episodes.

CRITICAL CONSTRAINT: PASS 1 VS PASS 2 DELEGATION
You are Pass 1. You excel at timeline math, deduplication, and spatial logic. \
Opus (Pass 2) excels at evocative, cinematic prose. \
DO NOT WRITE PROSE. Follow these strict formatting rules for string fields:

- [OPUS_ENRICHMENT]: For fields requiring evocative prose \
(`visual_description`, `atmosphere`), output exactly "[OPUS_ENRICHMENT]".
- FIELD BOUNDARY RULES (critical for downstream prompt assembly):
  * `visual_description` = FACE AND BODY ONLY: age, build, skin, hair, \
facial features, scars, physical traits. NO wardrobe, accessories, props, \
or narrative text. Items worn (respirators, counters, gloves, boots) belong \
EXCLUSIVELY in `wardrobe_description`. Literary phrases ("the compact build \
of someone who survives by...") are wasted tokens for image models — use \
concrete visual descriptors only.
  * `wardrobe_description` = CLOTHING AND ACCESSORIES ONLY: everything worn \
or carried. This is the single source of truth for what the character wears. \
Do NOT duplicate wardrobe items in `visual_description`.
  * `distinguishing_marks` = permanent body features only (scars, tattoos, \
implants). NOT clothing or accessories.
- SKELETON FORMAT: For physical description fields, output ONLY a dense, \
comma-separated list of raw nouns, colors, and physical facts. No sentences.
  * `wardrobe_description`: "heavy canvas jacket, rust stains, amber wrist \
counter on left wrist, cargo pants with reinforced knees, tool belt with \
salvage hook, steel-toed boots, repair patches visible"
  * `hair_makeup`: "grime-streaked face, sweat-matted hair pulled back, \
orange cuticles from rust"
  * `distinguishing_marks`: "burn scar left cheek (ep 12), radiation veins \
at temples (ep 9-13), vine marks on wrists (ep 21+)"
  * `description` (location): "metal grating floor, rust flakes, sweating \
pipes, emergency strip lights (amber/red), maintenance shaft access, cramped \
ceiling height"
  * `description` (prop): "heavy metal hook, worn smooth handle, \
salvage-standard issue, bent 45 degrees after ep 36"

METHODOLOGY & RULES:

1. CHAIN OF THOUGHT (`structural_analysis`):
- The very first field in the JSON schema is `structural_analysis`. You MUST \
use this as a scratchpad to write out your timeline calculations, prop \
deduplication logic, and phase boundary checks BEFORE outputting the final \
arrays.

2. GROUND TRUTH & PHASE BOUNDARIES (Hypothesis vs Proof):
- The Character Bible contains "Transformation Beats". These are the intended \
phase boundaries (the hypothesis).
- CONFIRM these beats against the screenplay (the proof). REFINE the exact \
start_ep/end_ep boundaries based on action lines.
- Trigger a phase boundary exactly when the screenplay action lines dictate a \
persistent change (wardrobe shift, major trauma, time jump).
- Extract specific physical items from the screenplay to build the skeleton.

3. CHARACTER PRESENCE (Absence is Data):
- If a character does not appear in an episode, do NOT include that episode \
number in the `episodes` array.
- Characters who die or leave the story permanently will have their arrays \
end early.

4. LOCATION CONSOLIDATION & HABITAT ZONES:
- Strip temporal markers (NIGHT, CONTINUOUS, LATER).
- First, assign the location to a core `habitat_zone` (e.g., Lower Decks, \
The Root) based on context clues.
- Consolidate locations ONLY within the same habitat zone. If "INT. CORRIDOR" \
appears in Lower Decks and Upper Decks, they are two distinct location_ids.
- Record all original screenplay scene headings in the `aliases` array.

5. PROP EXTRACTION & HANDLING:
- Any recurring physical object in 2+ episodes gets a `prop_id`.
- Objects mentioned ONLY in dialogue still count if they are physical items.
- "Signature Props" listed in the Character Bible are MANDATORY extractions, \
even if they only appear in 1 episode.
- `associated_characters` must include ANY character who physically interacts \
with or holds the prop, not just the owner.
- Track damage/changes to the prop over time in the `state_notes` field.

6. LIGHTING MOTIFS (Thematic Resonance):
- Extract lighting setups that have narrative purpose, not just ambient lights.
- Triggers: Bookends (appears significantly in premiere/finale), \
Character-Tied (changes state based on character action/emotion), or \
Narrative Trigger (reacts to specific threats/events).

7. INCREMENTAL MERGE MODE:
- If an existing GlobalBible is provided in your context, DO NOT overwrite \
existing phase start/end boundaries unless new episodes explicitly contradict.
- Append new episodes to the `episodes` arrays.
- Add new `CharacterPhase` blocks if a transformation occurs in new episodes.

Output strictly valid JSON matching the provided GlobalBible schema."""

STORYBOARD_SYSTEM_PROMPT = """\
You are the Starsend Storyboard Engine. You translate camera-tested script \
blocks into strict, render-ready JSON records.

RULES:
1. 1:1 MAPPING: You will receive an array of exactly {total_shots} text \
blocks. You MUST output exactly {total_shots} shot records. Do not merge \
or split them.
2. STRICT ID ENFORCEMENT: Use exact IDs for characters, props, and locations \
from the Global Bible provided.
3. PHASE MAPPING: This is episode {episode_num}. Map to the correct \
Character Phase based on the Bible's start_ep/end_ep logic.
4. PROMPT SKELETONS & KINETIC ACTION: Write highly descriptive, comma-separated \
keywords for the prompt_skeleton fields. No prose sentences.
SUBJECT_LINE: Write a brief character+action anchor for subject_line. The Blocking \
Pass (Stage 2.5) will overwrite this with detailed physical blocking later. Focus on \
the WHAT (action), not appearance — the bible handles appearance. \
Example: "Torch scanning corridor, looking over shoulder" (not "grime-streaked young \
woman with respirator").
CRITICAL: For kinetic_action, use specific camera-artifact language, NOT \
emotional summaries.
- Bad: "She is scared."
- Good: "micro-tremor in hands, wide-eyed shallow breath, sweat catching light"
- Bad: "He hits the wall."
- Good: "motion blur on point of contact, kinetic energy frozen mid-transfer, \
dust kicked into lens"
- Bad: "Running fast."
- Good: "motion blur on trailing limbs, forward lean, kinetic diagonal composition"
- Bad: "She is determined."
- Good: "jaw set, tendons visible in neck, center-weighted composition, static framing"
5. SHOT TYPE INFERENCE: You MUST assign a shot_type to every shot based on \
narrative context. Close emotional beats = CU/ECU. Establishing geography = \
WS/LS. Character action = MS/FS. Object focus = INSERT.
6. LIGHTING: Every shot MUST have at least one light source with a practical \
motivator (what's physically emitting light in the scene).
7. DURATION: Estimate target_editorial_duration_s based on the action density. \
Quick cuts = 2-3s. Dialogue = 3-5s. Atmospheric/establishing = 4-6s.
8. AUDIO: Extract dialogue verbatim. Note ambient SFX and foley from the \
scene description. Mark voice-over with is_voiceover=true.
9. SCENE INDEX: Copy the scene_index from the camera-tested input. Each shot \
record MUST have the same scene_index as its corresponding camera-tested shot.
10. SPATIAL STAGING (screen_direction + camera_side): This is critical for \
editorial flow. Do NOT default everything to center/A. Apply these rules:
- CAMERA SIDE: Within a dialogue scene, alternate camera_side "A" and "B" for \
shot-reverse-shot. Character who speaks first gets "A". Reaction shots get "B". \
ENV/establishing shots stay "A". If a character crosses the axis, note it.
- SCREEN DIRECTION: Match character movement and blocking:
  * Character entering from screen left → "left-to-right"
  * Character entering from screen right → "right-to-left"
  * Character stationary, angled or profiled → "center"
  * Character physically walking toward camera → "toward-camera"
  * Character walking away → "away-from-camera"
  * In dialogue: the speaker faces the listener. Frame the speaker off-center \
toward the side the listener occupies.
- TOWARD-CAMERA IS RARE: Reserve "toward-camera" for LITERAL physical movement \
toward the camera lens (a character walking down a hallway toward us). A \
character staring, reacting, or contemplating is "center" with appropriate \
camera_side, even if they face forward. "toward-camera" generates direct \
eye-to-lens contact which breaks immersion. When in doubt, use "center" or a \
lateral direction.
- 180-DEGREE RULE: Within a scene, maintain consistent camera_side unless the \
script explicitly describes a character crossing. If character A is screen-left \
in the establishing shot, keep them screen-left in all coverage.
- VISUAL FLOW BETWEEN CUTS: Consecutive shots should have complementary screen \
directions. If shot N has a character exiting right, shot N+1 should have them \
entering from left (match-on-action). Avoid two center shots in a row unless \
the scene is static.
- SOLO SCENES: Even solo characters should have spatial variation. A character \
pacing uses left-to-right then right-to-left. A character at a terminal is \
off-center. Prefer lateral directions (left-to-right, right-to-left) over \
center. Only truly static contemplative beats should be center."""


# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------

class IngestPipeline:
    """Three-stage render extraction pipeline.

    Supports two extraction backends:
      - "opus-4.6" (default, ADR H07): Uses Claude Opus via sub-agent skills
      - "gemini-3.1-pro-preview": Uses Gemini API directly

    batch_size controls how many episodes are sent per extraction call
    (default: 5 per ADR H07).
    """

    def __init__(
        self,
        project: str,
        project_root: Path,
        min_shots: int = 28,
        max_shots: int = 41,
        dry_run: bool = False,
        extraction_model: str = DEFAULT_EXTRACTION_MODEL,
        batch_size: int = DEFAULT_BATCH_SIZE,
    ):
        self.project = project
        self.project_root = Path(project_root)
        self.min_shots = min_shots
        self.max_shots = max_shots
        self.dry_run = dry_run
        self.extraction_model = extraction_model
        self.batch_size = batch_size
        self._client = None

        # Per-project state paths (ADR-C04)
        from lib.constants import PROJECTS_ROOT
        state_root = PROJECTS_ROOT / project / "state" / "starsend"
        self.camera_tested_dir = state_root / "camera_tested"
        self.plans_dir = state_root / "plans"
        self.episodes_dir = self.plans_dir
        self.bible_path = state_root / "global_bible.json"

        # Ensure output directories exist
        self.camera_tested_dir.mkdir(parents=True, exist_ok=True)
        self.plans_dir.mkdir(parents=True, exist_ok=True)
        self.episodes_dir.mkdir(parents=True, exist_ok=True)

    @property
    def client(self):
        """Lazy-init Gemini client."""
        if self._client is None:
            from google import genai
            api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
            if not api_key:
                raise RuntimeError("GOOGLE_API_KEY or GEMINI_API_KEY not set")
            self._client = genai.Client(api_key=api_key)
        return self._client

    # ------------------------------------------------------------------
    # File I/O helpers
    # ------------------------------------------------------------------

    def _load_episode_script(self, episode_num: int) -> str:
        """Load raw screenplay text for an episode."""
        ep_path = self.project_root / f"episodes/ep_{episode_num:03d}.md"
        if not ep_path.exists():
            raise FileNotFoundError(f"Episode script not found: {ep_path}")
        raw = ep_path.read_text()
        # Strip metadata: everything before the first scene heading or action
        # Keep only narrative content (scene headings, action, dialogue)
        return self._strip_metadata(raw)

    def _strip_metadata(self, text: str) -> str:
        """Strip episode metadata headers, word counts, and non-narrative cruft.

        Keeps: scene headings, action, dialogue, beat markers (for pacing context).
        Strips: title headers, word counts, dialogue %, markdown dividers,
                [[EPISODE N]] markers.
        """
        lines = text.split("\n")
        content_lines = []
        in_content = False
        for line in lines:
            stripped = line.strip()

            # Always skip these regardless of position
            if re.match(r"^\[\[EPISODE\s+\d+", stripped):
                continue
            if stripped == "---":
                continue

            if not in_content:
                # Skip metadata lines before content starts
                if re.match(r"^#\s+Episode\s+\d+", stripped):
                    continue
                if re.match(r"^\*\*(Word Count|Dialogue|Hook|Cliffhanger)", stripped, re.IGNORECASE):
                    continue
                if re.match(r"^(title|episode|word_count|words|cliffhanger|hook):", stripped, re.IGNORECASE):
                    continue
                if not stripped:
                    continue

                # Content starts at scene headings, beat markers, or action
                if (stripped.startswith("INT.") or stripped.startswith("EXT.")
                        or re.match(r"^#\s*\[\d+:\d+", stripped)  # Beat markers
                        or len(stripped) > 10):
                    in_content = True

            if in_content:
                content_lines.append(line)

        return "\n".join(content_lines).strip()

    def _load_character_bible(self) -> str:
        """Load character bible text."""
        bible_path = self.project_root / "bible/characters.md"
        if bible_path.exists():
            return bible_path.read_text()
        return ""

    def _load_project_config(self) -> dict:
        """Load project config JSON."""
        config_path = self.project_root / "visual/project_config.json"
        if config_path.exists():
            return json.loads(config_path.read_text())
        return {}

    def _load_breakdown(self) -> dict:
        """Load existing breakdown.json for character/location data."""
        breakdown_path = self.project_root / "visual/breakdown.json"
        if breakdown_path.exists():
            return json.loads(breakdown_path.read_text())
        return {}

    def _episode_ids(self) -> list[int]:
        """Discover all episode numbers from script files."""
        episodes_dir = self.project_root / "episodes"
        if not episodes_dir.exists():
            return []
        nums = []
        for f in sorted(episodes_dir.glob("ep_*.md")):
            match = re.match(r"ep_(\d+)\.md", f.name)
            if match:
                nums.append(int(match.group(1)))
        return sorted(nums)

    def _save_json(self, path: Path, data) -> None:
        """Save a Pydantic model or dict as formatted JSON."""
        if hasattr(data, "model_dump"):
            content = data.model_dump_json(indent=2)
        else:
            content = json.dumps(data, indent=2, default=str)
        path.write_text(content)
        logger.info(f"Saved: {path}")

    def _load_camera_tested(self, episode_num: int) -> CameraTestedEpisode:
        """Load a camera-tested episode from disk."""
        path = self.camera_tested_dir / f"ep_{episode_num:03d}.json"
        if not path.exists():
            raise FileNotFoundError(f"Camera-tested file not found: {path}")
        return CameraTestedEpisode.model_validate_json(path.read_text())

    def _load_bible(self) -> GlobalBible:
        """Load the global bible from disk."""
        path = self.bible_path
        if not path.exists():
            raise FileNotFoundError(f"Global bible not found: {path}")
        return GlobalBible.model_validate_json(path.read_text())

    # ------------------------------------------------------------------
    # Gemini API call with structured output
    # ------------------------------------------------------------------

    def _call_gemini(
        self,
        model: str,
        system_prompt: str,
        user_prompt: str,
        response_schema: dict | None = None,
        temperature: float = 0.3,
        max_tokens: int = 65536,
    ) -> str:
        """Call Gemini with optional response_schema enforcement."""
        from google.genai import types as genai_types

        config = genai_types.GenerateContentConfig(
            temperature=temperature,
            max_output_tokens=max_tokens,
            system_instruction=system_prompt,
        )

        if response_schema:
            config.response_mime_type = "application/json"
            config.response_json_schema = response_schema

        char_count = len(system_prompt) + len(user_prompt)
        logger.info(f"Calling {model} ({char_count:,} chars input)")

        if self.dry_run:
            logger.info(f"[DRY RUN] Would call {model} ({char_count:,} chars)")
            return None

        t0 = time.time()
        response = self.client.models.generate_content(
            model=model,
            contents=user_prompt,
            config=config,
        )
        elapsed = time.time() - t0

        text = response.text if hasattr(response, "text") else str(response)
        logger.info(f"Response: {len(text):,} chars in {elapsed:.1f}s")
        return text

    # ------------------------------------------------------------------
    # Stage 0: Camera-Test Pass
    # ------------------------------------------------------------------

    def run_camera_test(
        self,
        episode_num: int | None = None,
        episode_nums: list[int] | None = None,
    ) -> CameraTestedEpisode | list[CameraTestedEpisode] | None:
        """Stage 0: Break screenplay into shot-boundary text blocks.

        Accepts either a single episode_num (backward-compatible) or a list
        of episode_nums for batch mode. In batch mode, processes each episode
        sequentially within the batch and returns a list of results.
        """
        # Handle batch vs single mode
        if episode_nums is not None:
            results = []
            for ep in episode_nums:
                result = self._run_camera_test_single(ep)
                if result is not None:
                    results.append(result)
            return results
        elif episode_num is not None:
            return self._run_camera_test_single(episode_num)
        else:
            raise ValueError("Must provide either episode_num or episode_nums")

    def _run_camera_test_single(self, episode_num: int) -> CameraTestedEpisode | None:
        """Camera-test a single episode."""
        logger.info(f"=== Stage 0: Camera-Test EP{episode_num:03d} ===")

        script_text = self._load_episode_script(episode_num)
        episode_id = f"EP{episode_num:03d}"

        system_prompt = CAMERA_TEST_SYSTEM_PROMPT.format(
            min_shots=self.min_shots,
            max_shots=self.max_shots,
        )

        user_prompt = f"Camera-test the following screenplay episode.\n\n{script_text}"

        model = self._resolve_model(CAMERA_TEST_MODEL)
        schema = CameraTestedEpisode.model_json_schema()

        raw_response = self._call_gemini(
            model=model,
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            response_schema=schema,
        )

        if raw_response is None:
            logger.info("[DRY RUN] Camera-test skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                raw_response = self._call_gemini(
                    model=model,
                    system_prompt=system_prompt,
                    user_prompt=self._retry_prompt(user_prompt, errors),
                    response_schema=schema,
                )

            try:
                result = CameraTestedEpisode.model_validate_json(raw_response)
            except Exception as e:
                errors = [f"Pydantic validation error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue(episode_id, "camera_test", errors, raw_response)
                    raise
                continue

            # Validate shot budget
            budget_errors = validate_camera_test_budget(result, self.min_shots, self.max_shots)
            if budget_errors:
                errors = budget_errors
                logger.warning(f"Attempt {attempt + 1}: {errors}")
                if attempt == MAX_RETRIES:
                    logger.warning(f"Budget violation after {MAX_RETRIES + 1} attempts, saving with warning")
                    break
                continue

            break

        # Persist to disk
        out_path = self.camera_tested_dir / f"ep_{episode_num:03d}.json"
        self._save_json(out_path, result)
        logger.info(f"Camera-test complete: {result.total_shots} shots")
        return result

    # ------------------------------------------------------------------
    # Stage 1: Breakdown Pass / Global Bible
    # ------------------------------------------------------------------

    def run_breakdown_pass(
        self,
        episode_nums: list[int] | None = None,
        merge: bool = False,
    ) -> GlobalBible | None:
        """Stage 1: Synthesize Global Bible from all camera-tested episodes.

        If merge=True, incrementally merges the new bible with an existing
        global_bible.json (preserving human edits and accumulating data
        from prior batches).
        """
        if episode_nums is None:
            episode_nums = self._episode_ids()

        logger.info(f"=== Stage 1: Breakdown Pass ({len(episode_nums)} episodes) ===")

        # Gather all camera-tested texts
        episode_texts = []
        for ep_num in episode_nums:
            try:
                ct = self._load_camera_tested(ep_num)
                shots_text = "\n\n".join(
                    f"[SHOT {s.shot_index}]\n{s.source_text}" for s in ct.shots
                )
                episode_texts.append(f"[EPISODE {ep_num}]\n{shots_text}")
            except FileNotFoundError:
                logger.warning(f"No camera-tested file for EP{ep_num:03d}, skipping")

        if not episode_texts:
            raise RuntimeError("No camera-tested episodes found. Run camera-test first.")

        # Load supplementary data
        char_bible = self._load_character_bible()
        project_config = self._load_project_config()
        breakdown = self._load_breakdown()

        # Load existing bible for merge context
        existing_bible = None
        if merge:
            try:
                existing_bible = self._load_bible()
                logger.info(f"Merging with existing bible: {len(existing_bible.characters)} chars, {len(existing_bible.locations)} locs")
            except FileNotFoundError:
                logger.info("No existing bible found, creating fresh")

        model = self._resolve_model(BREAKDOWN_MODEL)

        system_prompt = BREAKDOWN_SYSTEM_PROMPT.format(
            project_name=self.project,
            total_episodes=len(episode_nums),
        )

        # Build user prompt with all context
        user_parts = ["# SCREENPLAY EPISODES (camera-tested)\n"]
        user_parts.append("\n\n---\n\n".join(episode_texts))

        if char_bible:
            user_parts.append("\n\n# CHARACTER BIBLE\n\n" + char_bible)

        if project_config:
            user_parts.append(
                "\n\n# PROJECT CONFIG\n\n" + json.dumps(project_config, indent=2)
            )

        if breakdown and "characters" in breakdown:
            char_data = {}
            for char_id, char_info in breakdown["characters"].items():
                char_data[char_id] = {
                    "display_name": char_info.get("display_name", char_id),
                    "visual_description": char_info.get("visual_description", ""),
                    "wardrobe": char_info.get("wardrobe", {}),
                }
            user_parts.append(
                "\n\n# EXISTING BREAKDOWN (wardrobe phases)\n\n"
                + json.dumps(char_data, indent=2)
            )

        if existing_bible:
            user_parts.append(
                "\n\n# EXISTING GLOBAL BIBLE (merge with this — preserve human edits)\n\n"
                + existing_bible.model_dump_json(indent=2)
            )

        user_prompt = "\n".join(user_parts)
        schema = GlobalBible.model_json_schema()

        raw_response = self._call_gemini(
            model=model,
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            response_schema=schema,
            max_tokens=65536,
        )

        if raw_response is None:
            logger.info("[DRY RUN] Breakdown pass skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                raw_response = self._call_gemini(
                    model=model,
                    system_prompt=system_prompt,
                    user_prompt=self._retry_prompt(user_prompt, errors),
                    response_schema=schema,
                    max_tokens=65536,
                )

            try:
                result = GlobalBible.model_validate_json(raw_response)
            except Exception as e:
                errors = [f"Pydantic validation error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue("global_bible", "breakdown", errors, raw_response)
                    raise
                continue

            break

        # Persist
        out_path = self.bible_path
        self._save_json(out_path, result)
        logger.info(f"Bible complete: {len(result.characters)} chars, {len(result.locations)} locs, {len(result.props)} props")
        return result

    # ------------------------------------------------------------------
    # Stage 1.5: Validate Global Bible (post-Gemini deterministic checks)
    # ------------------------------------------------------------------

    def validate_global_bible(self, bible: GlobalBible | None = None) -> list[str]:
        """Deterministic validation of GlobalBible after Gemini's structural pass.

        Checks from consultation (two_pass_system_prompt):
        1. Phase continuity: end_ep >= start_ep
        2. Phase overlap: phases[i+1].start_ep >= phases[i].end_ep
        3. Orphaned references: char_ids in props exist in characters
        4. Hex code validation: color_palette entries match #RRGGBB
        5. Episode monotonicity: sorted, no duplicates
        """
        if bible is None:
            bible = self._load_bible()

        errors = []

        for char_id, char in bible.characters.items():
            # Phase continuity
            for i, phase in enumerate(char.phases):
                if phase.end_ep < phase.start_ep:
                    errors.append(
                        f"{char_id}: phase '{phase.phase_id}' has end_ep={phase.end_ep} < start_ep={phase.start_ep}"
                    )

            # Phase overlap
            for i in range(len(char.phases) - 1):
                curr = char.phases[i]
                nxt = char.phases[i + 1]
                if nxt.start_ep < curr.end_ep:
                    errors.append(
                        f"{char_id}: phase '{nxt.phase_id}' starts at ep {nxt.start_ep} "
                        f"but previous phase '{curr.phase_id}' ends at ep {curr.end_ep}"
                    )

            # Episode monotonicity
            if char.episodes != sorted(set(char.episodes)):
                errors.append(f"{char_id}: episodes array not sorted or has duplicates")

        # Orphaned references in props
        for prop_id, prop in bible.props.items():
            for assoc_char in prop.associated_characters:
                if assoc_char not in bible.characters:
                    errors.append(
                        f"prop '{prop_id}': associated_character '{assoc_char}' not in bible.characters"
                    )
            if prop.episodes != sorted(set(prop.episodes)):
                errors.append(f"prop '{prop_id}': episodes array not sorted or has duplicates")

        # Orphaned references in lighting motifs
        for motif in bible.lighting_motifs:
            for loc_id in motif.associated_locations:
                if loc_id not in bible.locations:
                    errors.append(
                        f"motif '{motif.motif_id}': location '{loc_id}' not in bible.locations"
                    )
            for char_id in motif.associated_characters:
                if char_id not in bible.characters:
                    errors.append(
                        f"motif '{motif.motif_id}': character '{char_id}' not in bible.characters"
                    )

        # Hex code validation
        import re as _re
        hex_pattern = _re.compile(r"^#[0-9A-Fa-f]{6}$")
        for loc_id, loc in bible.locations.items():
            for color in loc.color_palette:
                if not hex_pattern.match(color):
                    errors.append(
                        f"location '{loc_id}': invalid hex color '{color}'"
                    )

        # ── Field boundary checks: wardrobe bleed into visual_description ──
        _wardrobe_markers = [
            "respirator", "glove", "boot", "jacket", "pants", "shirt",
            "belt", "holster", "vest", "coat", "shoes", "counter",
            "wearing", "wears", "draped", "hung", "strapped",
        ]
        for char_id, char in bible.characters.items():
            visual_low = (char.visual_description or "").lower()
            if not visual_low:
                continue
            hits = [m for m in _wardrobe_markers if m in visual_low]
            if hits:
                errors.append(
                    f"{char_id}: visual_description contains wardrobe terms "
                    f"{hits} — these belong in wardrobe_description only"
                )
            # Check for literary/narrative text (sentences with no visual value)
            _narrative_markers = [
                "someone who", "the kind of", "as if", "suggests",
                "speaks to", "the weapon", "a world of",
            ]
            narr_hits = [m for m in _narrative_markers if m in visual_low]
            if narr_hits:
                errors.append(
                    f"{char_id}: visual_description contains narrative text "
                    f"{narr_hits} — use concrete visual descriptors only"
                )

        if errors:
            logger.warning(f"Bible validation: {len(errors)} errors found")
            for err in errors:
                logger.warning(f"  - {err}")
        else:
            logger.info("Bible validation: CLEAN (0 errors)")

        return errors

    # ------------------------------------------------------------------
    # Stage 2: Storyboard Pass
    # ------------------------------------------------------------------

    def run_storyboard_pass(self, episode_num: int, bible: GlobalBible | None = None) -> EpisodePlan | None:
        """Stage 2: Generate per-episode render plan from camera-tested + bible."""
        episode_id = f"EP{episode_num:03d}"
        logger.info(f"=== Stage 2: Storyboard Pass {episode_id} ===")

        ct = self._load_camera_tested(episode_num)
        if bible is None:
            bible = self._load_bible()

        # Guard: warn if bible has unfilled enrichment placeholders
        if bible is not None:
            bible_json = bible.model_dump() if hasattr(bible, 'model_dump') else (
                bible.dict() if hasattr(bible, 'dict') else bible
            )
            if isinstance(bible_json, dict):
                unfilled = self._find_placeholders(bible_json)
                if unfilled:
                    paths_preview = ", ".join(p for p, _ in unfilled[:5])
                    more = f" (+{len(unfilled) - 5} more)" if len(unfilled) > 5 else ""
                    logger.warning(
                        f"Bible contains {len(unfilled)} unfilled [OPUS_ENRICHMENT] placeholders: "
                        f"{paths_preview}{more}. Plan quality may be degraded."
                    )

        model = self._resolve_model(STORYBOARD_MODEL)

        system_prompt = STORYBOARD_SYSTEM_PROMPT.format(
            total_shots=ct.total_shots,
            episode_num=episode_num,
        )

        shots_text = "\n\n".join(
            f"[SHOT {s.shot_index}] (dialogue={s.has_dialogue}, chars={s.characters_mentioned})\n{s.source_text}"
            for s in ct.shots
        )

        bible_summary = self._summarize_bible_for_episode(bible, episode_num)

        user_prompt = (
            f"# CAMERA-TESTED EPISODE {episode_id}\n\n"
            f"Total shots: {ct.total_shots}\n\n"
            f"{shots_text}\n\n"
            f"# GLOBAL BIBLE REFERENCE\n\n{bible_summary}"
        )

        schema = EpisodePlan.model_json_schema()

        raw_response = self._call_gemini(
            model=model,
            system_prompt=system_prompt,
            user_prompt=user_prompt,
            response_schema=schema,
            max_tokens=65536,
        )

        if raw_response is None:
            logger.info("[DRY RUN] Storyboard pass skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                raw_response = self._call_gemini(
                    model=model,
                    system_prompt=system_prompt,
                    user_prompt=self._retry_prompt(user_prompt, errors),
                    response_schema=schema,
                    max_tokens=65536,
                )

            try:
                result = EpisodePlan.model_validate_json(raw_response)
            except Exception as e:
                errors = [f"Pydantic validation error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue(episode_id, "storyboard", errors, raw_response)
                    raise
                continue

            # Validate against bible
            bible_errors = validate_plan_against_bible(result, bible)
            if bible_errors:
                errors = bible_errors
                logger.warning(f"Attempt {attempt + 1}: {len(errors)} bible validation errors")
                if attempt == MAX_RETRIES:
                    logger.warning(f"Bible validation errors after {MAX_RETRIES + 1} attempts, saving with warnings")
                    for e in errors[:10]:
                        logger.warning(f"  {e}")
                    break
                continue

            break

        # ── Post-extraction: routing + hashing + merge (ADR-R04, R05, R08) ──

        # Compute source_hash for stale detection
        raw_script = self._load_episode_script(episode_num)
        result_dict = result.model_dump(mode="json")
        result_dict["source_hash"] = hashlib.md5(raw_script.encode("utf-8")).hexdigest()

        # Route each shot + compile prompts
        from orchestrator.execution_plan import _determine_pipeline, _determine_model
        bible_dict = bible.model_dump(mode="json") if hasattr(bible, "model_dump") else dict(bible)
        project_config = self._load_project_config()

        for shot_dict in result_dict.get("shots", []):
            routing = shot_dict.get("routing_data", {})
            pipeline = _determine_pipeline(routing)
            model = _determine_model(pipeline, routing)
            shot_dict["pipeline"] = pipeline
            shot_dict["model"] = model

            # Per-shot source text hash for merge tracking
            source_text = shot_dict.get("source_text", "")
            shot_dict["source_text_hash"] = hashlib.md5(
                source_text.encode("utf-8")
            ).hexdigest()

            # JIT: No longer bake compiled_prompts at Stage 2.
            # Prompts are compiled at generation time from live bible data.
            # Run IP1 Plan Pass Critic (subsumes sterility gate + adds 4 more dimensions).
            try:
                from lib.constants import get_config
                _cfg = get_config()
                if _cfg.get("critic_flags", {}).get("ip1_plan_pass", False):
                    from lib.critics.plan_pass_critic import PlanPassCritic
                    _pool_dir = self.episodes_dir.parent  # projects/{project}/state/visual
                    _critic = PlanPassCritic(
                        bible=bible_dict,
                        all_shots=result_dict.get("shots", []),
                        max_attempts=2,
                        experience_pool_dir=_pool_dir,
                        shot_id=shot_dict.get("shot_id", ""),
                    )
                    shot_dict, _cr = _critic.run(shot_dict)
                    if not _cr.passed:
                        logger.warning(
                            f"IP1 critic: {shot_dict.get('shot_id', '?')} — "
                            f"{[d.name for d in _cr.hard_failures]}"
                        )
                    elif _cr.soft_failures:
                        logger.info(
                            f"IP1 critic advisory: {shot_dict.get('shot_id', '?')} — "
                            f"{[d.name for d in _cr.soft_failures]}"
                        )
                else:
                    # Fallback: original sterility gate when critic flag is off
                    from lib.jit_prompt import validate_sterility, auto_fix_tokens
                    shot_dict = auto_fix_tokens(shot_dict, bible_dict)
                    violations = validate_sterility(shot_dict, bible_dict)
                    if violations:
                        logger.warning(
                            f"Sterility gate: {shot_dict.get('shot_id', '?')} — "
                            f"{'; '.join(violations)}"
                        )
            except Exception as e:
                # Graceful degradation: if critic fails, try original sterility gate
                try:
                    from lib.jit_prompt import validate_sterility, auto_fix_tokens
                    shot_dict = auto_fix_tokens(shot_dict, bible_dict)
                    violations = validate_sterility(shot_dict, bible_dict)
                    if violations:
                        logger.warning(
                            f"Sterility gate (fallback): {shot_dict.get('shot_id', '?')} — "
                            f"{'; '.join(violations)}"
                        )
                except Exception as e2:
                    logger.warning(f"Sterility gate failed for {shot_dict.get('shot_id', '?')}: {e2}")

        # Merge with prior plan if it exists (preserve human edits)
        out_path = self.episodes_dir / f"ep_{episode_num:03d}_plan.json"
        if out_path.exists():
            try:
                old_plan = json.loads(out_path.read_text(encoding="utf-8"))
                result_dict = merge_with_prior_plan(old_plan, result_dict)
                logger.info("Merged with prior plan (preserved human overrides)")
            except Exception as e:
                logger.warning(f"Merge with prior plan failed: {e}")

        # Spatial validation — auto-correct contradictions before saving
        try:
            from orchestrator.spatial_validation import validate_spatial_data
            sv_result = validate_spatial_data(result_dict, auto_correct=True)
            if sv_result["corrections"]:
                for c in sv_result["corrections"]:
                    logger.info(
                        "  [SPATIAL FIX] %s: %s → %s (%s)",
                        c["shot_id"], c["old"], c["new"], c["reason"],
                    )
            if sv_result["warnings"]:
                for w in sv_result["warnings"]:
                    logger.warning("  [SPATIAL WARN] %s: %s", w["shot_id"], w["message"])
        except Exception as e:
            logger.warning(f"Spatial validation failed (non-fatal): {e}")

        # Persist
        self._save_json(out_path, result_dict)
        logger.info(f"Plan complete: {result.total_shots} shots")
        return result

    def _load_project_config(self) -> dict:
        """Load project_config.json from the Recoil project directory."""
        config_path = self.project_root / "visual" / "project_config.json"
        if config_path.exists():
            return json.loads(config_path.read_text(encoding="utf-8"))
        return {}

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    def _resolve_model(self, gemini_model: str) -> str:
        """Return the configured extraction model.

        If extraction_model is set to a Gemini variant, use that directly.
        If it's 'opus-4.6', the caller should dispatch via sub-agent skill
        instead. For Gemini API calls, this returns the gemini_model fallback.
        """
        if self.extraction_model.startswith("gemini"):
            return self.extraction_model
        # For opus, fall back to the Gemini model for API calls
        # (sub-agent dispatch happens at the orchestrator level)
        return gemini_model

    def _summarize_bible_for_episode(self, bible: GlobalBible, episode_num: int) -> str:
        """Create a focused bible summary for a specific episode."""
        parts = []

        # Characters active in this episode
        parts.append("## Characters\n")
        for char_id, char in bible.characters.items():
            if episode_num in char.episodes or not char.episodes:
                phase = char.phase_for_episode(episode_num)
                phase_desc = f" [{phase.phase_id}]: {phase.wardrobe_description}" if phase else ""
                parts.append(f"- **{char_id}** ({char.display_name}): {char.visual_description}{phase_desc}")

        # Locations
        parts.append("\n## Locations\n")
        for loc_id, loc in bible.locations.items():
            parts.append(f"- **{loc_id}**: {loc.description}")

        # Props
        parts.append("\n## Props\n")
        for prop_id, prop in bible.props.items():
            parts.append(f"- **{prop_id}**: {prop.description}")

        return "\n".join(parts)

    def _retry_prompt(self, original_prompt: str, errors: list[str]) -> str:
        """Build a retry prompt that includes the original + error feedback."""
        error_text = "\n".join(f"- {e}" for e in errors[:20])
        return (
            f"{original_prompt}\n\n"
            f"---\n\n"
            f"YOUR PREVIOUS OUTPUT HAD ERRORS. Fix these issues:\n\n"
            f"{error_text}\n\n"
            f"Output valid JSON that passes all validation checks."
        )

    def _send_to_review_queue(
        self, entity_id: str, stage: str, errors: list[str], raw_response: str
    ) -> None:
        """Save failed extraction to review queue for human inspection."""
        queue_path = DATA_DIR / "review_queue.json"
        queue = []
        if queue_path.exists():
            queue = json.loads(queue_path.read_text())

        queue.append({
            "entity_id": entity_id,
            "stage": stage,
            "errors": errors,
            "raw_response": raw_response[:5000],  # Truncate
            "timestamp": datetime.now().isoformat(),
        })

        queue_path.write_text(json.dumps(queue, indent=2))
        logger.error(f"Sent to review queue: {entity_id} ({stage}) — {len(errors)} errors")

    # ------------------------------------------------------------------
    # Stage 2.5: Blocking Pass
    # ------------------------------------------------------------------

    def run_blocking_pass(
        self,
        episode_num: int,
        scene_indices: list[int] | None = None,
    ) -> dict | None:
        """Stage 2.5: Generate physical blocking for all shots in an episode.

        Must run AFTER run_storyboard_pass() — requires an existing plan JSON.
        Processes per-scene, generates Scene Blocking Documents (SBDs),
        and writes blocking_metadata + updated subject_lines back to the plan.

        Args:
            episode_num: Episode number.
            scene_indices: Optional list of specific scenes to (re-)process.

        Returns:
            Dict with stats, or None on dry run.
        """
        from orchestrator.blocking_pass import BlockingPass

        episode_id = f"EP{episode_num:03d}"
        logger.info(f"=== Stage 2.5: Blocking Pass {episode_id} ===")

        bp = BlockingPass(
            project=self.project,
            project_root=self.project_root,
            dry_run=self.dry_run,
        )

        return bp.run(
            episode_num=episode_num,
            scene_indices=scene_indices,
        )

    # ------------------------------------------------------------------
    # Full pipeline
    # ------------------------------------------------------------------

    def run_full(self, episode_nums: list[int] | None = None) -> dict:
        """Run all three stages for the specified episodes.

        Uses batch_size to process episodes in batches for camera-test
        and breakdown passes.
        """
        if episode_nums is None:
            episode_nums = self._episode_ids()

        results = {
            "camera_tested": {},
            "bible": None,
            "enrichment": None,
            "plans": {},
        }

        # Stage 0: Camera-test in batches
        logger.info(f"Starting full pipeline for {len(episode_nums)} episodes (batch_size={self.batch_size})")
        for i in range(0, len(episode_nums), self.batch_size):
            batch = episode_nums[i:i + self.batch_size]
            logger.info(f"Camera-test batch: EP{batch[0]:03d}-EP{batch[-1]:03d}")
            for ep_num in batch:
                try:
                    ct = self.run_camera_test(episode_num=ep_num)
                    if ct is not None:
                        results["camera_tested"][ep_num] = ct
                except Exception as e:
                    logger.error(f"Camera-test failed for EP{ep_num:03d}: {e}")

        if self.dry_run:
            logger.info("[DRY RUN] Full pipeline dry-run complete")
            return results

        # Stage 1: Global bible
        try:
            bible = self.run_breakdown_pass(episode_nums)
            results["bible"] = bible
        except Exception as e:
            logger.error(f"Breakdown pass failed: {e}")
            return results

        if bible is None:
            return results

        # Stage 1.5: Opus Enrichment (fills [OPUS_ENRICHMENT] placeholders)
        if getattr(self, '_skip_enrichment', False):
            logger.info("Opus enrichment skipped (--skip-enrichment flag)")
            results["enrichment"] = {"skipped": True, "reason": "flag"}
        else:
            try:
                enrichment_result = self.run_opus_enrichment()
                results["enrichment"] = enrichment_result
                if enrichment_result.get("filled", 0) > 0:
                    bible = self._load_bible()
            except Exception as e:
                logger.warning(
                    f"Opus enrichment skipped: {e}. "
                    f"Bible may contain [OPUS_ENRICHMENT] placeholders."
                )
                results["enrichment"] = {"skipped": True, "reason": str(e)}

        # Stage 2: Per-episode plans
        for ep_num in episode_nums:
            if ep_num not in results["camera_tested"]:
                continue
            try:
                plan = self.run_storyboard_pass(ep_num, bible)
                if plan is not None:
                    results["plans"][ep_num] = plan
            except Exception as e:
                logger.error(f"Storyboard pass failed for EP{ep_num:03d}: {e}")

        logger.info(
            f"Pipeline complete: {len(results['camera_tested'])} camera-tested, "
            f"{'1 bible' if results['bible'] else 'no bible'}, "
            f"{len(results['plans'])} plans"
        )
        return results

    # ------------------------------------------------------------------
    # Stage 1.5: Opus Enrichment
    # ------------------------------------------------------------------

    @staticmethod
    def _find_placeholders(obj, path=""):
        """Recursively find all [OPUS_ENRICHMENT] placeholders in a dict.

        Returns list of (dotpath, field_type) tuples.
        E.g., [("characters.SADIE.visual_description", "visual_description"),
               ("locations.int_bar.atmosphere", "atmosphere")]
        """
        results = []
        if isinstance(obj, str) and "[OPUS_ENRICHMENT]" in obj:
            # Derive field_type from the last segment, stripping list indices
            raw_field = path.rsplit(".", 1)[-1] if "." in path else path
            field_type = re.sub(r'\[\d+\]$', '', raw_field)
            results.append((path, field_type))
        elif isinstance(obj, dict):
            for k, v in obj.items():
                child_path = f"{path}.{k}" if path else k
                results.extend(IngestPipeline._find_placeholders(v, child_path))
        elif isinstance(obj, list):
            for i, v in enumerate(obj):
                results.extend(IngestPipeline._find_placeholders(v, f"{path}[{i}]"))
        return results

    @staticmethod
    def _set_by_dotpath(obj, dotpath, value):
        """Set a value in a nested dict using dot notation.

        Handles both dict keys and list indices (e.g., "phases[0].wardrobe").
        """
        parts = re.split(r'\.(?![^\[]*\])', dotpath)
        current = obj
        for i, part in enumerate(parts[:-1]):
            # Handle list index
            m = re.match(r'(.+)\[(\d+)\]$', part)
            if m:
                current = current[m.group(1)][int(m.group(2))]
            else:
                current = current[part]
        # Set the final value
        last = parts[-1]
        m = re.match(r'(.+)\[(\d+)\]$', last)
        if m:
            current[m.group(1)][int(m.group(2))] = value
        else:
            current[last] = value

    @staticmethod
    def _extract_character_dna(characters_md: str, character_name: str) -> str:
        """Extract the section for a specific character from characters.md.

        Handles both prose sections (## SADIE) and markdown tables.
        Splits on any heading level (##, ###) and matches case-insensitively.
        Falls back to table row extraction if headers don't match.
        Returns the full section text, or empty string if not found.
        """
        # 1. Try header splitting (handles ## SADIE, ### SADIE, etc.)
        sections = re.split(r'^#+\s+', characters_md, flags=re.MULTILINE)
        for section in sections:
            first_line = section.strip().split('\n')[0] if section.strip() else ""
            # Match first word exactly to avoid substring false positives (e.g., "LIA" matching "AMELIA")
            first_word = first_line.split()[0].upper() if first_line.strip() else ""
            if character_name.upper() == first_word:
                return section.strip()

        # 2. Fallback: table row extraction (| SADIE | ... |)
        lines = characters_md.split('\n')
        for i, line in enumerate(lines):
            if line.strip().startswith('|') and re.search(rf'\b{re.escape(character_name)}\b', line, re.IGNORECASE):
                header_lines = []
                for j in range(i - 1, -1, -1):
                    if not lines[j].strip().startswith('|'):
                        break
                    header_lines.insert(0, lines[j])
                if len(header_lines) >= 2 and '-' in header_lines[1]:
                    return f"{header_lines[0]}\n{header_lines[1]}\n{line}"
                return line

        return ""

    @staticmethod
    def _extract_world_section(characters_md: str) -> str:
        """Extract 'The World' or 'Visual Grammar' sections from characters.md.

        These sections are relevant for location atmosphere enrichment.
        """
        sections = re.split(r'^## ', characters_md, flags=re.MULTILINE)
        relevant = []
        for section in sections:
            first_line = section.strip().split('\n')[0].lower() if section.strip() else ""
            if any(kw in first_line for kw in ("world", "visual grammar", "influences", "bleed")):
                relevant.append("## " + section.strip())
        return "\n\n".join(relevant)

    def _call_opus(self, system_prompt: str, user_prompt: str) -> dict:
        """Call Anthropic Opus for enrichment. Returns {"text": str, "cost": float}.

        Uses Opus pricing ($15/M input, $75/M output). Raises on API errors.
        """
        import anthropic

        client = anthropic.Anthropic()
        response = client.messages.create(
            model=OPUS_ENRICHMENT_MODEL,
            max_tokens=1024,
            system=system_prompt,
            messages=[{"role": "user", "content": user_prompt}],
        )
        text = response.content[0].text
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        cost = (input_tokens * OPUS_INPUT_COST_PER_M / 1_000_000) + \
               (output_tokens * OPUS_OUTPUT_COST_PER_M / 1_000_000)
        return {"text": text.strip(), "cost": round(cost, 6), "input_tokens": input_tokens, "output_tokens": output_tokens}

    def run_opus_enrichment(self, bible_path: Path | None = None) -> dict:
        """Stage 1.5: Fill [OPUS_ENRICHMENT] placeholders in the global bible via Opus.

        Groups placeholders by entity (character or location) for coherent
        per-entity enrichment. Each entity gets one Opus call with all its
        placeholders, the relevant character DNA, and bible structural data.

        Args:
            bible_path: Path to global_bible.json. Defaults to self.bible_path.

        Returns:
            dict with keys: filled (int), skipped (int), failed (list),
            total_cost (float), details (list of per-call results)
        """
        if bible_path is None:
            bible_path = self.bible_path

        if not bible_path.exists():
            raise FileNotFoundError(f"Bible not found: {bible_path}")

        if not os.environ.get("ANTHROPIC_API_KEY"):
            logger.warning("ANTHROPIC_API_KEY not set — skipping Opus enrichment")
            return {"filled": 0, "skipped": 0, "failed": [], "total_cost": 0.0, "details": [], "reason": "no_api_key"}

        bible = json.loads(bible_path.read_text(encoding="utf-8"))
        placeholders = self._find_placeholders(bible)

        if not placeholders:
            logger.info("No [OPUS_ENRICHMENT] placeholders found — bible already enriched")
            return {"filled": 0, "skipped": 0, "failed": [], "total_cost": 0.0, "details": []}

        logger.info(f"=== Stage 1.5: Opus Enrichment ({len(placeholders)} placeholders) ===")

        # Load character DNA if available
        dna_path = self.project_root / "bible" / "characters.md"
        characters_md = dna_path.read_text(encoding="utf-8") if dna_path.exists() else ""
        if not characters_md:
            logger.warning("No bible/characters.md found — enrichment will use structural data only")

        # Group placeholders by entity
        # e.g., {"characters.SADIE": [("characters.SADIE.visual_description", "visual_description"), ...]}
        entity_groups: dict[str, list[tuple[str, str]]] = {}
        for dotpath, field_type in placeholders:
            parts = dotpath.split(".")
            if len(parts) >= 2:
                entity_key = f"{parts[0]}.{parts[1]}"  # e.g., "characters.SADIE"
            else:
                entity_key = "_project"  # top-level fields like wardrobe_philosophy
            entity_groups.setdefault(entity_key, []).append((dotpath, field_type))

        results = {"filled": 0, "skipped": 0, "failed": [], "total_cost": 0.0, "details": []}

        for entity_key, fields in entity_groups.items():
            entity_type = entity_key.split(".")[0] if "." in entity_key else "project"
            entity_name = entity_key.split(".")[1] if "." in entity_key else entity_key

            logger.info(f"Enriching {entity_key} ({len(fields)} fields)")

            # Build context for this entity
            if entity_type == "characters":
                # Character: extract DNA section + bible structural data
                dna_section = self._extract_character_dna(characters_md, entity_name)
                if not dna_section and characters_md:
                    dna_section = characters_md  # Fallback to full file
                bible_entry = bible.get("characters", {}).get(entity_name, {})
                context_block = f"CHARACTER: {entity_name}\n\n"
                if dna_section:
                    context_block += f"--- CHARACTER DNA (creative intent) ---\n{dna_section}\n\n"
                context_block += f"--- BIBLE STRUCTURAL DATA ---\n{json.dumps(bible_entry, indent=2)}\n"

            elif entity_type == "locations":
                # Location: bible structural data + world section from DNA
                bible_entry = bible.get("locations", {}).get(entity_name, {})
                world_section = self._extract_world_section(characters_md)
                context_block = f"LOCATION: {entity_name}\n\n"
                if world_section:
                    context_block += f"--- WORLD CONTEXT ---\n{world_section}\n\n"
                context_block += f"--- BIBLE STRUCTURAL DATA ---\n{json.dumps(bible_entry, indent=2)}\n"

            else:
                # Project-level (wardrobe_philosophy)
                context_block = f"PROJECT: {self.project}\n\n"
                if characters_md:
                    world_section = self._extract_world_section(characters_md)
                    if world_section:
                        context_block += f"--- WORLD CONTEXT ---\n{world_section}\n\n"
                # Include character roster summary
                char_names = list(bible.get("characters", {}).keys())
                context_block += f"--- CHARACTERS ---\n{', '.join(char_names)}\n"

            # Build per-field prompts within this entity
            field_prompts = []
            for dotpath, field_type in fields:
                fmt = FORMAT_INSTRUCTIONS.get(field_type, FORMAT_INSTRUCTIONS.get("atmosphere"))
                field_prompts.append(f"### {dotpath}\nFormat: {fmt}\n")

            system_prompt = (
                "You are a creative director filling in visual production descriptions for a "
                "vertical microdrama series. Each field has specific format requirements. "
                "Output ONLY the requested content for each field, clearly labeled. "
                "Do NOT add commentary, explanations, or preamble.\n\n"
                "For EACH field below, output:\n"
                "FIELD: {dotpath}\n"
                "VALUE: {your content}\n\n"
                "Separate fields with a blank line."
            )

            user_prompt = f"{context_block}\n\n--- FIELDS TO FILL ---\n\n" + "\n".join(field_prompts)

            try:
                result = self._call_opus(system_prompt, user_prompt)
                logger.info(
                    f"  Opus response: {result['input_tokens']} in / {result['output_tokens']} out, "
                    f"${result['cost']:.4f}"
                )

                # Parse response — extract FIELD:/VALUE: pairs
                response_text = result["text"]
                filled_count = 0
                for dotpath, field_type in fields:
                    # Look for FIELD: {dotpath}\nVALUE: {content} (tolerates markdown bolding)
                    pattern = rf"(?i)\**FIELD:\**\s*{re.escape(dotpath)}\s*\n\**VALUE:\**\s*(.*?)(?=\n\**FIELD:|\Z)"
                    match = re.search(pattern, response_text, re.DOTALL)
                    if match:
                        value = match.group(1).strip().strip('`').strip()
                        if value and "[OPUS_ENRICHMENT]" not in value:
                            self._set_by_dotpath(bible, dotpath, value)
                            filled_count += 1
                            logger.info(f"  Filled: {dotpath} ({len(value)} chars)")
                        else:
                            logger.warning(f"  Empty or placeholder value for {dotpath}")
                            results["failed"].append(dotpath)
                    else:
                        logger.warning(f"  Could not parse response for {dotpath}")
                        results["failed"].append(dotpath)

                results["filled"] += filled_count
                results["total_cost"] += result["cost"]
                results["details"].append({
                    "entity": entity_key,
                    "fields_requested": len(fields),
                    "fields_filled": filled_count,
                    "cost": result["cost"],
                })

            except Exception as e:
                logger.error(f"  Opus call failed for {entity_key}: {e}")
                for dotpath, _ in fields:
                    # Mark failed placeholders so they're visible
                    self._set_by_dotpath(bible, dotpath, f"[OPUS_ENRICHMENT_FAILED: {e}]")
                    results["failed"].append(dotpath)

        # Atomic write — tempfile + os.replace
        import tempfile
        fd, tmp = tempfile.mkstemp(dir=str(bible_path.parent), suffix=".json")
        try:
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                json.dump(bible, f, indent=2, ensure_ascii=False)
            os.replace(tmp, str(bible_path))
            logger.info(f"Enriched bible written: {bible_path}")
        except Exception:
            try:
                os.unlink(tmp)
            except OSError:
                pass
            raise

        logger.info(
            f"Enrichment complete: {results['filled']} filled, "
            f"{len(results['failed'])} failed, ${results['total_cost']:.4f}"
        )
        return results


# ---------------------------------------------------------------------------
# Plan Merge (ADR-R08)
# ---------------------------------------------------------------------------


def merge_with_prior_plan(old_plan: dict, new_plan: dict) -> dict:
    """Merge a new plan with a prior one, preserving human edits.

    Match shots by source_text_hash (NOT shot_id) so that re-ordering
    or re-numbering doesn't lose manual work.

    Preserves from old plan:
      - manual_prompt_override
      - human_approvals
      - review_status
      - selected_take_id

    Flags changed/new shots with `needs_review: True`.

    Args:
        old_plan: Previously saved plan dict.
        new_plan: Freshly generated plan dict.

    Returns:
        Merged plan dict.
    """
    # Build lookup from old plan keyed by source_text_hash
    old_by_hash: dict[str, dict] = {}
    for shot in old_plan.get("shots", []):
        h = shot.get("source_text_hash", "")
        if h:
            old_by_hash[h] = shot

    # Merge
    merged_shots = []
    for shot in new_plan.get("shots", []):
        new_hash = shot.get("source_text_hash", "")
        old_shot = old_by_hash.get(new_hash) if new_hash else None

        if old_shot:
            # Unchanged shot — preserve human edits
            _PRESERVE_FIELDS = [
                "manual_prompt_override",
                "human_approvals",
                "review_status",
                "selected_take_id",
            ]
            for field in _PRESERVE_FIELDS:
                if field in old_shot:
                    shot[field] = old_shot[field]
            shot["needs_review"] = False
        else:
            # New or changed shot — flag for Dailies review
            shot["needs_review"] = True

        merged_shots.append(shot)

    new_plan["shots"] = merged_shots

    # Count changes
    changed = sum(1 for s in merged_shots if s.get("needs_review"))
    if changed:
        logger.info(f"Merge: {changed}/{len(merged_shots)} shots flagged as new/changed")

    return new_plan


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    import argparse

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        datefmt="%H:%M:%S",
    )

    parser = argparse.ArgumentParser(description="Starsend Render Extraction Pipeline")
    parser.add_argument("--project", default="leviathan", help="Project name")
    parser.add_argument(
        "--project-root",
        default=str(Path.home() / "Dropbox/CLAUDE_PROJECTS/projects/leviathan"),
        help="Path to project data (Recoil project directory)",
    )
    parser.add_argument("--dry-run", action="store_true", help="Don't call APIs")
    parser.add_argument(
        "--extraction-model",
        default=DEFAULT_EXTRACTION_MODEL,
        help=f"Extraction model (default: {DEFAULT_EXTRACTION_MODEL})",
    )
    parser.add_argument(
        "--batch-size",
        type=int,
        default=DEFAULT_BATCH_SIZE,
        help=f"Episodes per batch (default: {DEFAULT_BATCH_SIZE})",
    )

    sub = parser.add_subparsers(dest="command")

    # camera-test
    ct_parser = sub.add_parser("camera-test", help="Run Stage 0 on episode(s)")
    ct_parser.add_argument("episodes", nargs="+", type=int, help="Episode numbers")

    # breakdown
    sub.add_parser("breakdown", help="Run Stage 1 (Global Bible)")

    # enrichment
    sub.add_parser("enrichment", help="Run Stage 1.5 (Opus Enrichment on bible)")

    # storyboard
    sb_parser = sub.add_parser("storyboard", help="Run Stage 2 on episode(s)")
    sb_parser.add_argument("episodes", nargs="+", type=int, help="Episode numbers")

    # full
    full_parser = sub.add_parser("full", help="Run all stages")
    full_parser.add_argument("--episodes", nargs="*", type=int, help="Episode numbers (default: all)")
    full_parser.add_argument("--skip-enrichment", action="store_true",
                             help="Skip Opus enrichment (Stage 1.5) in full pipeline")

    args = parser.parse_args()

    pipeline = IngestPipeline(
        project=args.project,
        project_root=Path(args.project_root),
        dry_run=args.dry_run,
        extraction_model=args.extraction_model,
        batch_size=args.batch_size,
    )

    if args.command == "camera-test":
        for ep in args.episodes:
            pipeline.run_camera_test(episode_num=ep)

    elif args.command == "breakdown":
        pipeline.run_breakdown_pass()

    elif args.command == "enrichment":
        pipeline.run_opus_enrichment()

    elif args.command == "storyboard":
        bible = pipeline._load_bible()
        for ep in args.episodes:
            pipeline.run_storyboard_pass(ep, bible)

    elif args.command == "full":
        if getattr(args, 'skip_enrichment', False):
            # Temporarily disable enrichment in run_full
            pipeline._skip_enrichment = True
        pipeline.run_full(args.episodes)

    else:
        parser.print_help()


if __name__ == "__main__":
    # Add project root to path when running as script
    sys.path.insert(0, str(Path(__file__).parent.parent))
    main()
