"""
ingest_pipeline.py — Four-stage render extraction pipeline.

Extracts all visual production data from completed screenplays.

Stage 0: Camera-Test Pass (Gemini 2.5 Flash)
  - Breaks raw screenplay into shot-boundary text blocks
  - Enforces 28-41 shot budget per episode
  - Output: projects/{project}/state/visual/camera_tested/ep_NNN.json

Stage 1: Breakdown Pass / Global Bible (Opus-OAuth)
  - Synthesizes characters with phases, locations, props, lighting motifs
  - Input: All camera-tested JSONs + character bible + project config
  - Output: projects/{project}/state/visual/global_bible.json

Stage 1.5: Opus Enrichment (Claude Opus 4.6)
  - Fills [OPUS_ENRICHMENT] placeholders in the global bible
  - Character visual descriptions, location atmospheres, wardrobe direction
  - Groups fields by entity for coherent per-entity enrichment
  - Uses Opus-OAuth enrichment; no paid Anthropic API key required
  - Output: projects/{project}/state/visual/global_bible.json (enriched in-place)

Stage 2: Storyboard Pass (Opus-OAuth)
  - 1:1 mapping of camera-tested text blocks → render-ready shot records
  - Input: 1 camera-tested JSON + global bible
  - Output: projects/{project}/state/visual/plans/ep_NNN_plan.json

Usage:
    from orchestrator.ingest_pipeline import IngestPipeline

    pipeline = IngestPipeline(project="leviathan", project_root=Path("../projects/leviathan"))
    pipeline.run_camera_test(episode_num=1)
    pipeline.run_breakdown_pass()
    pipeline.run_opus_enrichment()           # Stage 1.5 standalone
    pipeline.run_storyboard_pass(episode_num=1)
"""

from __future__ import annotations

import hashlib
import json
import logging
import os
import re
import sys
import tempfile
import time
from datetime import datetime, timezone
from pathlib import Path

# Ensure project root is on path for imports
_PROJECT_ROOT = Path(__file__).parent.parent
if str(_PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(_PROJECT_ROOT))

from recoil.core.model_profiles import get_model
from recoil.pipeline._lib.opus_oauth import OpusOAuthError, call_opus_oauth
from recoil.pipeline._lib.render_schema import (
    AssetData,
    AudioData,
    CameraTestedEpisode,
    CreativeEpisodeOutput,
    EpisodePlan,
    GlobalBible,
    LightIntensity,
    Lighting,
    LightSource,
    PromptData,
    PropInteraction,
    RoutingData,
    ScreenPosition,
    ShotCharacter,
    ShotRecord,
    SpatialData,
    TimeOfDay,
    validate_camera_test_budget,
    validate_plan_against_bible,
    Visibility,
    VisualMode,
)
from recoil.pipeline._lib import derivation_manifest, plan_overrides
from recoil.pipeline._lib.episode_script import (
    episode_script_text_sha,
    load_episode_script_text,
    strip_metadata,
)
from recoil.pipeline._lib.derivation_sha import content_sha, plan_structural_sha

logger = logging.getLogger(__name__)

PIPELINE_ROOT = Path(__file__).parent.parent

# Legacy data dirs (kept for reference; pipeline now uses per-project paths)
DATA_DIR = PIPELINE_ROOT / "data"

# Default extraction model — Opus 4.6 per ADR H07.
# Gemini reserved as configurable fallback.
DEFAULT_EXTRACTION_MODEL = get_model("extraction", "text")

# Gemini fallback models (used when extraction_model is a Gemini variant)
CAMERA_TEST_MODEL = get_model("camera_test", "text")
BREAKDOWN_MODEL = get_model("bible_author", "text")  # Opus-OAuth bible; Gemini→Opus.
# Opus-OAuth skeleton; Gemini→Opus.
STORYBOARD_MODEL = get_model("skeleton_author", "text")

MAX_RETRIES = 2
DEFAULT_BATCH_SIZE = 5


class LocationUnresolvedError(ValueError):
    """A shot's scene heading did not resolve to any bible location, and the
    pipeline refuses to silently default. Carries the offending hint for the
    manifest health flag (REC-165 / D4)."""

    def __init__(self, message: str, *, shot_index: int | None = None, hint: str | None = None):
        super().__init__(message)
        self.shot_index = shot_index
        self.hint = hint


_SCENE_TYPE_PREFIX_RE = re.compile(
    r'^\s*(?:INT\.?/EXT\.?|EXT\.?/INT\.?|I/E|INT\.?|EXT\.?)\b[\s.\-:]*',
    re.IGNORECASE,
)
_TIME_OF_DAY_SUFFIX_RE = re.compile(
    r'\s*[-–—]\s*(?:DAY|NIGHT|CONTINUOUS|LATER|MOMENTS\s+LATER|MORNING|EVENING|'
    r'DUSK|DAWN|SAME(?:\s+TIME)?|FLASHBACK)\s*$',
    re.IGNORECASE,
)

# Prose-author model (REC-72 D0c) — distinct from OPUS_ENRICHMENT_MODEL.
# Declared here for the D2 author stage (later); this phase only resolves it.
# SCOPE NOTE: model_roles.json-only (no model_profiles.json entry). Cross-config
# validation is currently DEFERRED (RECOIL_ENFORCE_CROSS_CONFIG unset by default,
# per model_profiles.py::_run_cross_config_validation_once docstring). When
# enforcement is enabled it ALREADY raises on 10 pre-existing text/tts/qc role
# leaves (e.g. text.extraction='opus-4.6'); adding a claude-opus-4-8 profile would
# not make it pass and is out of Phase-0 scope. prose_author follows the same
# role-map-only convention as those existing text roles.
PROSE_AUTHOR_MODEL = get_model("prose_author", "text")

# Opus enrichment model + pricing
OPUS_ENRICHMENT_MODEL = "claude-opus-4-6"
OPUS_INPUT_COST_PER_M = 15.0   # $/M tokens
OPUS_OUTPUT_COST_PER_M = 75.0  # $/M tokens

# Field-type-dependent format instructions for Opus enrichment
FORMAT_INSTRUCTIONS = {
    "visual_description": (
        "Output a dense comma-separated list of concrete physical descriptors. "
        "FACE AND BODY ONLY — no wardrobe, no accessories, no narrative prose, no metaphors. "
        "Be maximally specific: exact age range, build type, skin tone and undertone, "
        "hair texture/color/length/style, facial bone structure, eye color/shape/spacing, "
        "nose shape, mouth shape, distinguishing physical traits. "
        "Every token must be renderable by an image generation model. "
        "Example: 'late 20s, lean angular build, warm olive skin, dark brown hair cropped "
        "short with grey at temples, deep-set hazel eyes with permanent fatigue shadows, "
        "sharp cheekbones, thin deliberate mouth, prominent collarbones'"
    ),
    "atmosphere": (
        "Write 2-3 sentences of evocative cinematic prose describing the mood, "
        "light quality, sound texture, temperature, and emotional feel of this location. "
        "Write for a cinematographer scouting the space, not a novelist. "
        "Be sensory-specific: name the light source, the color temperature, "
        "what you hear when the dialogue stops, what the air feels like."
    ),
    "wardrobe_arc_thesis": (
        "Write 2-3 sentences explaining the dramatic logic of this character's wardrobe "
        "evolution across the series. What does their clothing reveal about their "
        "psychological state at each turning point? Reference specific phase transitions."
    ),
    "wardrobe_arc_vision": (
        "Write 2-3 sentences of visual direction for this character's wardrobe. "
        "Reference specific textures, color palettes, silhouettes, and real-world "
        "style influences. A costume designer should be able to pull a rack from this."
    ),
    "wardrobe_philosophy": (
        "Write 2-3 sentences of project-level wardrobe direction. "
        "What is the unifying visual language across all characters? "
        "What does clothing mean in this world? How does wardrobe serve the story's themes?"
    ),
}


# ---------------------------------------------------------------------------
# Prompts
# ---------------------------------------------------------------------------

CAMERA_TEST_SYSTEM_PROMPT = """\
You are the Lead Editor for a vertical microdrama. Your job is to perform a \
"Camera-Test" on a raw screenplay, breaking it down into exact, individual \
camera shots.

RULES:
1. ONE SHOT = ONE ARRAY ITEM.
2. SPLIT SHOTS WHEN: The subject changes, the camera scale changes \
(e.g., Wide to Close-Up), the location changes, screen direction reverses, \
or there is a time jump.
3. SUSPENSE FRAGMENTATION: In high-tension moments, single sentences or \
fragments SHOULD be their own shots (e.g., punch-ins, ticking clocks, \
sudden reactions).
4. KEEP TOGETHER: Dialogue and the action describing that dialogue belong \
in the same shot. Verbless fragments continuing the same visual beat stay \
together.
5. SHOT BUDGET: You MUST output between {min_shots} and {max_shots} shots \
for this episode. If there is too much action, consolidate minor beats \
into single continuous tracking shots. If there is too little, punch in \
for reaction shots.
6. CHARACTER DETECTION: List all character names that appear or are \
referenced in each shot's text.
7. LOCATION HINTS: If a scene heading (INT./EXT.) precedes a shot, \
include it as location_hint.
8. DIALOGUE DETECTION: Mark has_dialogue=true if the text contains \
character dialogue (a character name followed by their spoken line).
9. SCENE INDEX: Assign a scene_index (starting at 1) to each shot based \
on NARRATIVE scene boundaries. A new scene starts when: the dramatic beat \
changes, a new conversation topic begins, there's a tonal shift, or a new \
confrontation/interaction begins. Do NOT rely solely on location changes — \
multiple scenes can happen in one location (e.g., a conversation and then \
a fight in the same room are separate scenes). Consecutive shots in the \
same dramatic beat share the same scene_index."""

BREAKDOWN_SYSTEM_PROMPT = """\
You are the Structural Synthesis Engine for Starsend, a cinematic rendering engine.
You are performing PASS 1 (Structural Extraction) of a 2-pass architecture \
for the project "{project_name}".

INPUTS:
1. A camera-tested screenplay containing {total_episodes} episodes.
2. The author's official Project Metadata (Character Bibles, Project Config).
3. [IF MERGING] An existing GlobalBible JSON from previous episodes.

YOUR GOAL:
Synthesize a single, canonical Global Render Bible tracking state changes \
across all {total_episodes} episodes.

CRITICAL CONSTRAINT: PASS 1 VS PASS 2 DELEGATION
You are Pass 1. You excel at timeline math, deduplication, and spatial logic. \
Opus (Pass 2) excels at evocative, cinematic prose. \
DO NOT WRITE PROSE. Follow these strict formatting rules for string fields:

- [OPUS_ENRICHMENT]: For fields requiring evocative prose \
(`visual_description`, `atmosphere`), output exactly "[OPUS_ENRICHMENT]".
- FIELD BOUNDARY RULES (critical for downstream prompt assembly):
  * `visual_description` = FACE AND BODY ONLY: age, build, skin, hair, \
facial features, scars, physical traits. NO wardrobe, accessories, props, \
or narrative text. Items worn (respirators, counters, gloves, boots) belong \
EXCLUSIVELY in `wardrobe_description`. Literary phrases ("the compact build \
of someone who survives by...") are wasted tokens for image models — use \
concrete visual descriptors only.
  * `wardrobe_description` = CLOTHING AND ACCESSORIES ONLY: everything worn \
or carried. This is the single source of truth for what the character wears. \
Do NOT duplicate wardrobe items in `visual_description`.
  * `distinguishing_marks` = permanent body features only (scars, tattoos, \
implants). NOT clothing or accessories.
- SKELETON FORMAT: For physical description fields, output ONLY a dense, \
comma-separated list of raw nouns, colors, and physical facts. No sentences.
  * `wardrobe_description`: "heavy canvas jacket, rust stains, amber wrist \
counter on left wrist, cargo pants with reinforced knees, tool belt with \
salvage hook, steel-toed boots, repair patches visible"
  * `hair_makeup`: "grime-streaked face, sweat-matted hair pulled back, \
orange cuticles from rust"
  * `distinguishing_marks`: "burn scar left cheek (ep 12), radiation veins \
at temples (ep 9-13), vine marks on wrists (ep 21+)"
  * `description` (location): "metal grating floor, rust flakes, sweating \
pipes, emergency strip lights (amber/red), maintenance shaft access, cramped \
ceiling height"
  * `description` (prop): "heavy metal hook, worn smooth handle, \
salvage-standard issue, bent 45 degrees after ep 36"

METHODOLOGY & RULES:

1. CHAIN OF THOUGHT (`structural_analysis`):
- The very first field in the JSON schema is `structural_analysis`. You MUST \
use this as a scratchpad to write out your timeline calculations, prop \
deduplication logic, and phase boundary checks BEFORE outputting the final \
arrays.

2. GROUND TRUTH & PHASE BOUNDARIES (Hypothesis vs Proof):
- The Character Bible contains "Transformation Beats". These are the intended \
phase boundaries (the hypothesis).
- CONFIRM these beats against the screenplay (the proof). REFINE the exact \
start_ep/end_ep boundaries based on action lines.
- Trigger a phase boundary exactly when the screenplay action lines dictate a \
persistent change (wardrobe shift, major trauma, time jump).
- Extract specific physical items from the screenplay to build the skeleton.

3. CHARACTER PRESENCE (Absence is Data):
- If a character does not appear in an episode, do NOT include that episode \
number in the `episodes` array.
- Characters who die or leave the story permanently will have their arrays \
end early.

4. LOCATION CONSOLIDATION & HABITAT ZONES:
- Strip temporal markers (NIGHT, CONTINUOUS, LATER).
- First, assign the location to a core `habitat_zone` (e.g., Lower Decks, \
The Root) based on context clues.
- Consolidate locations ONLY within the same habitat zone. If "INT. CORRIDOR" \
appears in Lower Decks and Upper Decks, they are two distinct location_ids.
- Record all original screenplay scene headings in the `aliases` array.

5. PROP EXTRACTION & HANDLING:
- Any recurring physical object in 2+ episodes gets a `prop_id`.
- Objects mentioned ONLY in dialogue still count if they are physical items.
- "Signature Props" listed in the Character Bible are MANDATORY extractions, \
even if they only appear in 1 episode.
- `associated_characters` must include ANY character who physically interacts \
with or holds the prop, not just the owner.
- Track damage/changes to the prop over time in the `state_notes` field.

6. LIGHTING MOTIFS (Thematic Resonance):
- Extract lighting setups that have narrative purpose, not just ambient lights.
- Triggers: Bookends (appears significantly in premiere/finale), \
Character-Tied (changes state based on character action/emotion), or \
Narrative Trigger (reacts to specific threats/events).

7. INCREMENTAL MERGE MODE:
- If an existing GlobalBible is provided in your context, DO NOT overwrite \
existing phase start/end boundaries unless new episodes explicitly contradict.
- Append new episodes to the `episodes` arrays.
- Add new `CharacterPhase` blocks if a transformation occurs in new episodes.

Output strictly valid JSON matching the provided GlobalBible schema."""

STORYBOARD_SYSTEM_PROMPT = """\
You are the Starsend Storyboard Engine. For each camera-tested shot you emit ONLY \
its genuinely creative direction as a compact JSON object. A deterministic Python \
step assembles the full render record around your output — do NOT emit ids, \
per-shot character/location/prop references, dialogue, per-shot spatial staging, or \
lighting trees. (EXCEPTION: the scene-level axis_plans below MAY name characters to \
define the line of action.)

You will receive {total_shots} text blocks, each tagged with its scene index. Output ONE \
CreativeEpisodeOutput containing {total_shots} CreativeShot objects (same shot_index as \
each input block; do not merge or split) PLUS scene-keyed axis_plans (described below).

For each shot emit:
- shot_index: copy the input [SHOT N] index exactly.
- prompt_skeleton: the 5 creative lines.
  * subject_line: physical blocking / focal element — the WHAT (action, pose, gaze), \
NOT appearance or wardrobe (the bible owns appearance). Comma-separated keywords.
  * environment_line: production-design details. Comma-separated keywords.
  * action_line: frozen-frame kinetic micro-details. Comma-separated keywords.
  * motion_line: ONE or TWO full sentences of TEMPORAL action — what visibly changes \
between the first and last frame. Active present/present-progressive. MANDATORY.
  * emotion_line: emotional tone + character state. Comma-separated keywords.
- shot_type: WS/LS/FS/MS/MCU/CU/ECU/INSERT. Close emotional beats=CU/ECU; \
establishing=WS/LS; action=MS/FS; object focus=INSERT.
- camera_movement: STATIC unless the shot needs pan/tilt/tracking.
- kinetic_action: camera-artifact language, NOT emotion. \
Good: "motion blur on point of contact, dust kicked into lens". Bad: "He is scared".
- target_editorial_duration_s: quick cut=2-3, dialogue=3-5, atmospheric=4-6.
- narrative_requires_match_cut: true only if this shot needs frame-precise first+last \
frame continuity into the next.
- light_motivator: what PHYSICALLY emits the dominant light, e.g. "emergency strip \
light". Leave "" to inherit the location's default lighting.

Also emit axis_plans: a map keyed by each scene index from the [SHOT N] (scene=K) tags. \
For each scene define the 180-degree line of action:
- initial_anchor: kind (motion for a moving solo subject; dialogue/group for two-or-more \
subjects; geography/neutral otherwise), reference_direction (the travel/relationship vector \
on camera side A — left-to-right / right-to-left / toward-camera / away-from-camera; a \
NON-neutral anchor must NOT be "center"), and subjects (character names from that scene's \
chars that define the line). For any non-"neutral" kind, reference_direction MUST be lateral \
(left-to-right or right-to-left); reserve toward-camera/away-from-camera/center for kind="neutral". \
Vary reference_direction across scenes to match real blocking.
- transitions: ONLY the moments the line deliberately changes, keyed by before_shot_index \
(the shot_index at which it takes effect): intentional_jump (cross the line for a power \
shift/disorientation), re_establish (the line moved because blocking changed — also give a \
new_anchor), or neutral_pivot (an on-axis CENTER bridge that re-centers to side A of the SAME line — \
use re_establish, NOT neutral_pivot, when the line of action itself changes). EVERY transition needs a non-empty \
reason. Do NOT emit per-shot screen_direction/camera_side — Python derives those from this plan.

Output valid JSON only: a CreativeEpisodeOutput object (shots + axis_plans). No markdown, no commentary."""


def _creative_input_block(s) -> str:
    """One storyboard input block for a camera-tested shot. Carries `scene=` so the LLM can
    author per-scene axis_plans (REC-180). Extracted for behavioral testability."""
    return (
        f"[SHOT {s.shot_index}] (scene={s.scene_index}, dialogue={s.has_dialogue}, "
        f"chars={s.characters_mentioned})\n{s.source_text}"
    )


def _validate_motion_lines(plan: EpisodePlan) -> list[str]:
    """Require freshly authored shots to include temporal motion prose."""
    errors: list[str] = []
    for shot in plan.shots:
        motion_line = shot.prompt_data.prompt_skeleton.motion_line
        if not isinstance(motion_line, str) or not motion_line.strip():
            errors.append(f"{shot.shot_id}: prompt_skeleton.motion_line is required")
    return errors


# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------

class IngestPipeline:
    """Three-stage render extraction pipeline.

    Supports two extraction backends:
      - "claude-*" roles: Uses Opus-OAuth for bible/storyboard structured output
      - "gemini-*": Uses Gemini API directly

    batch_size controls how many episodes are sent per extraction call
    (default: 5 per ADR H07).
    """

    def __init__(
        self,
        project: str,
        project_root: Path,
        min_shots: int = 28,
        max_shots: int = 41,
        dry_run: bool = False,
        extraction_model: str = DEFAULT_EXTRACTION_MODEL,
        batch_size: int = DEFAULT_BATCH_SIZE,
    ):
        self.project = project
        self.project_root = Path(project_root)
        self.min_shots = min_shots
        self.max_shots = max_shots
        self.dry_run = dry_run
        self.extraction_model = extraction_model
        self.batch_size = batch_size
        self._client = None

        # Per-project state paths (ADR-C04)
        from recoil.core.paths import ProjectPaths
        state_root = ProjectPaths.for_project(project).visual_state_dir
        self.camera_tested_dir = state_root / "camera_tested"
        self.plans_dir = state_root / "plans"
        self.episodes_dir = self.plans_dir
        self.bible_path = state_root / "global_bible.json"

        # Ensure output directories exist
        self.camera_tested_dir.mkdir(parents=True, exist_ok=True)
        self.plans_dir.mkdir(parents=True, exist_ok=True)
        self.episodes_dir.mkdir(parents=True, exist_ok=True)

    @property
    def client(self):
        """Lazy-init Gemini client."""
        if self._client is None:
            from google import genai
            api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
            if not api_key:
                raise RuntimeError("GOOGLE_API_KEY or GEMINI_API_KEY not set")
            self._client = genai.Client(api_key=api_key)
        return self._client

    # ------------------------------------------------------------------
    # File I/O helpers
    # ------------------------------------------------------------------

    def _load_episode_script(self, episode_num: int) -> str:
        """Load raw screenplay text for an episode."""
        return load_episode_script_text(self.project, episode_num)

    def _strip_metadata(self, text: str) -> str:
        """Compatibility wrapper for callers that still reach this method."""
        return strip_metadata(text)

    def _load_character_bible(self) -> str:
        """Load character bible text."""
        bible_path = self.project_root / "bible/characters.md"
        if bible_path.exists():
            return bible_path.read_text()
        return ""

    def _load_project_config(self) -> dict:
        """Load project config JSON."""
        config_path = self.project_root / "visual/project_config.json"
        if config_path.exists():
            return json.loads(config_path.read_text())
        return {}

    def _load_breakdown(self) -> dict:
        """Load existing breakdown.json for character/location data."""
        breakdown_path = self.project_root / "visual/breakdown.json"
        if breakdown_path.exists():
            return json.loads(breakdown_path.read_text())
        return {}

    def _episode_ids(self) -> list[int]:
        """Discover all episode numbers from script files."""
        episodes_dir = self.project_root / "episodes"
        if not episodes_dir.exists():
            return []
        nums = []
        for f in sorted(episodes_dir.glob("ep_*.md")):
            match = re.match(r"ep_(\d+)\.md", f.name)
            if match:
                nums.append(int(match.group(1)))
        return sorted(nums)

    def _save_json(self, path: Path, data) -> None:
        """Save a Pydantic model or dict as formatted JSON — ATOMIC (tmp+os.replace)."""
        if hasattr(data, "model_dump"):
            content = data.model_dump_json(indent=2)
        else:
            content = json.dumps(data, indent=2, default=str)
        path.parent.mkdir(parents=True, exist_ok=True)
        fd, tmp = tempfile.mkstemp(dir=str(path.parent), suffix=".json")
        try:
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                f.write(content)
                f.flush()
                os.fsync(f.fileno())
            os.replace(tmp, str(path))
        except Exception:
            try:
                os.unlink(tmp)
            except OSError:
                pass
            raise
        logger.info(f"Saved: {path}")

    def _load_camera_tested(self, episode_num: int) -> CameraTestedEpisode:
        """Load a camera-tested episode from disk."""
        path = self.camera_tested_dir / f"ep_{episode_num:03d}.json"
        if not path.exists():
            raise FileNotFoundError(f"Camera-tested file not found: {path}")
        return CameraTestedEpisode.model_validate_json(path.read_text())

    def _load_bible(self) -> GlobalBible:
        """Load the global bible from disk."""
        path = self.bible_path
        if not path.exists():
            raise FileNotFoundError(f"Global bible not found: {path}")
        return GlobalBible.model_validate_json(path.read_text())

    # ------------------------------------------------------------------
    # Gemini API call with structured output
    # ------------------------------------------------------------------

    def _call_gemini(
        self,
        model: str,
        system_prompt: str,
        user_prompt: str,
        response_schema: dict | None = None,
        temperature: float = 0.3,
        max_tokens: int = 65536,
    ) -> str:
        """Call Gemini with optional response_schema enforcement."""
        from google.genai import types as genai_types

        config = genai_types.GenerateContentConfig(
            temperature=temperature,
            max_output_tokens=max_tokens,
            system_instruction=system_prompt,
        )

        if response_schema:
            config.response_mime_type = "application/json"
            config.response_json_schema = response_schema

        char_count = len(system_prompt) + len(user_prompt)
        logger.info(f"Calling {model} ({char_count:,} chars input)")

        if self.dry_run:
            logger.info(f"[DRY RUN] Would call {model} ({char_count:,} chars)")
            return None

        t0 = time.time()
        response = self.client.models.generate_content(
            model=model,
            contents=user_prompt,
            config=config,
        )
        elapsed = time.time() - t0

        text = response.text if hasattr(response, "text") else str(response)
        logger.info(f"Response: {len(text):,} chars in {elapsed:.1f}s")
        return text

    # ------------------------------------------------------------------
    # Stage 0: Camera-Test Pass
    # ------------------------------------------------------------------

    def run_camera_test(
        self,
        episode_num: int | None = None,
        episode_nums: list[int] | None = None,
    ) -> CameraTestedEpisode | list[CameraTestedEpisode] | None:
        """Stage 0: Break screenplay into shot-boundary text blocks.

        Accepts either a single episode_num (backward-compatible) or a list
        of episode_nums for batch mode. In batch mode, processes each episode
        sequentially within the batch and returns a list of results.
        """
        # Handle batch vs single mode
        if episode_nums is not None:
            results = []
            for ep in episode_nums:
                result = self._run_camera_test_single(ep)
                if result is not None:
                    results.append(result)
            return results
        elif episode_num is not None:
            return self._run_camera_test_single(episode_num)
        else:
            raise ValueError("Must provide either episode_num or episode_nums")

    def _run_camera_test_single(self, episode_num: int) -> CameraTestedEpisode | None:
        """Camera-test a single episode."""
        logger.info(f"=== Stage 0: Camera-Test EP{episode_num:03d} ===")

        script_text = self._load_episode_script(episode_num)
        episode_id = f"EP{episode_num:03d}"

        system_prompt = CAMERA_TEST_SYSTEM_PROMPT.format(
            min_shots=self.min_shots,
            max_shots=self.max_shots,
        )

        user_prompt = f"Camera-test the following screenplay episode.\n\n{script_text}"

        model = self._resolve_model(CAMERA_TEST_MODEL)
        schema = CameraTestedEpisode.model_json_schema()

        if model.startswith("claude-"):
            if self.dry_run:
                logger.info("[DRY RUN] Camera-test skipped")
                return None
            raw_response = call_opus_oauth(
                model,
                system_prompt,
                self._camera_test_opus_prompt(user_prompt),
                json_schema=schema,
                timeout=2400,
            )
        else:
            raw_response = self._call_gemini(
                model=model,
                system_prompt=system_prompt,
                user_prompt=user_prompt,
                response_schema=schema,
            )

        if raw_response is None:
            logger.info("[DRY RUN] Camera-test skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                retry_prompt = self._retry_prompt(user_prompt, errors)
                if model.startswith("claude-"):
                    raw_response = call_opus_oauth(
                        model,
                        system_prompt,
                        self._camera_test_opus_prompt(retry_prompt),
                        json_schema=schema,
                        timeout=2400,
                    )
                else:
                    raw_response = self._call_gemini(
                        model=model,
                        system_prompt=system_prompt,
                        user_prompt=retry_prompt,
                        response_schema=schema,
                    )

            try:
                result = self._validate_camera_test_response(raw_response, episode_num)
            except Exception as e:
                errors = [f"Pydantic validation error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue(episode_id, "camera_test", errors, raw_response)
                    raise
                continue

            # Validate shot budget
            budget_errors = validate_camera_test_budget(result, self.min_shots, self.max_shots)
            if budget_errors:
                errors = budget_errors
                logger.warning(f"Attempt {attempt + 1}: {errors}")
                if attempt == MAX_RETRIES:
                    logger.warning(f"Budget violation after {MAX_RETRIES + 1} attempts, saving with warning")
                    break
                continue

            break

        # Persist to disk
        out_path = self.camera_tested_dir / f"ep_{episode_num:03d}.json"
        self._save_json(out_path, result)
        logger.info(f"Camera-test complete: {result.total_shots} shots")

        # Stamp the derivation manifest (REC-164 Phase 3). Best-effort-but-loud:
        # a manifest write failure FAILs the stage (MINOR-7).
        try:
            derivation_manifest.stamp_stage(
                self.project,
                episode_num,
                "camera_tested",
                kind="derived",
                content_sha=content_sha(result.model_dump()),
                structural_sha=None,
                source={"script_sha": episode_script_text_sha(script_text)},
                builder="stage0.camera_test",
                model=CAMERA_TEST_MODEL,
                via="oauth",
            )
        except Exception:
            logger.error(
                "Failed to stamp camera_tested manifest for ep_%03d", episode_num,
                exc_info=True,
            )
            raise

        return result

    # ------------------------------------------------------------------
    # Stage 1: Breakdown Pass / Global Bible
    # ------------------------------------------------------------------

    def run_breakdown_pass(
        self,
        episode_nums: list[int] | None = None,
        merge: bool = False,
    ) -> GlobalBible | None:
        """Stage 1: Synthesize Global Bible from all camera-tested episodes.

        If merge=True, incrementally merges the new bible with an existing
        global_bible.json (preserving human edits and accumulating data
        from prior batches).
        """
        if episode_nums is None:
            episode_nums = self._episode_ids()

        logger.info(f"=== Stage 1: Breakdown Pass ({len(episode_nums)} episodes) ===")

        # Gather all camera-tested texts
        episode_texts = []
        for ep_num in episode_nums:
            try:
                ct = self._load_camera_tested(ep_num)
                shots_text = "\n\n".join(
                    f"[SHOT {s.shot_index}]\n{s.source_text}" for s in ct.shots
                )
                episode_texts.append(f"[EPISODE {ep_num}]\n{shots_text}")
            except FileNotFoundError:
                logger.warning(f"No camera-tested file for EP{ep_num:03d}, skipping")

        if not episode_texts:
            raise RuntimeError("No camera-tested episodes found. Run camera-test first.")

        # Load supplementary data
        char_bible = self._load_character_bible()
        project_config = self._load_project_config()
        breakdown = self._load_breakdown()

        # Load existing bible for merge context
        existing_bible = None
        if merge:
            try:
                existing_bible = self._load_bible()
                logger.info(f"Merging with existing bible: {len(existing_bible.characters)} chars, {len(existing_bible.locations)} locs")
            except FileNotFoundError:
                logger.info("No existing bible found, creating fresh")

        model = self._resolve_model(BREAKDOWN_MODEL)

        system_prompt = BREAKDOWN_SYSTEM_PROMPT.format(
            project_name=self.project,
            total_episodes=len(episode_nums),
        )

        # Build user prompt with all context
        user_parts = ["# SCREENPLAY EPISODES (camera-tested)\n"]
        user_parts.append("\n\n---\n\n".join(episode_texts))

        if char_bible:
            user_parts.append("\n\n# CHARACTER BIBLE\n\n" + char_bible)

        if project_config:
            user_parts.append(
                "\n\n# PROJECT CONFIG\n\n" + json.dumps(project_config, indent=2)
            )

        if breakdown and "characters" in breakdown:
            char_data = {}
            for char_id, char_info in breakdown["characters"].items():
                char_data[char_id] = {
                    "display_name": char_info.get("display_name", char_id),
                    "visual_description": char_info.get("visual_description", ""),
                    "wardrobe": char_info.get("wardrobe", {}),
                }
            user_parts.append(
                "\n\n# EXISTING BREAKDOWN (wardrobe phases)\n\n"
                + json.dumps(char_data, indent=2)
            )

        if existing_bible:
            user_parts.append(
                "\n\n# EXISTING GLOBAL BIBLE (merge with this — preserve human edits)\n\n"
                + existing_bible.model_dump_json(indent=2)
            )

        user_prompt = "\n".join(user_parts)
        schema = GlobalBible.model_json_schema()

        if model.startswith("claude-"):
            if self.dry_run:
                logger.info("[DRY RUN] Breakdown pass skipped")
                return None

            raw_response = call_opus_oauth(
                model,
                system_prompt,
                user_prompt,
                json_schema=schema,
                timeout=2400,
            )
        else:
            raw_response = self._call_gemini(
                model=model,
                system_prompt=system_prompt,
                user_prompt=user_prompt,
                response_schema=schema,
                max_tokens=65536,
            )

        if raw_response is None:
            logger.info("[DRY RUN] Breakdown pass skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                retry_prompt = self._retry_prompt(user_prompt, errors)
                if model.startswith("claude-"):
                    raw_response = call_opus_oauth(
                        model,
                        system_prompt,
                        retry_prompt,
                        json_schema=schema,
                        timeout=2400,
                    )
                else:
                    raw_response = self._call_gemini(
                        model=model,
                        system_prompt=system_prompt,
                        user_prompt=retry_prompt,
                        response_schema=schema,
                        max_tokens=65536,
                    )

            try:
                result = self._validate_breakdown_response(raw_response)
            except Exception as e:
                errors = [f"Pydantic validation error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue("global_bible", "breakdown", errors, raw_response)
                    raise
                continue

            break

        # Persist
        out_path = self.bible_path
        self._save_json(out_path, result)
        logger.info(f"Bible complete: {len(result.characters)} chars, {len(result.locations)} locs, {len(result.props)} props")

        # Stamp the PROJECT-LEVEL bible record (series-shared, MAJOR-3 — NOT a
        # per-episode stage entry). REC-164 Phase 3, best-effort-but-loud.
        try:
            derivation_manifest.stamp_bible(
                self.project,
                content_sha=content_sha(result.model_dump()),
                builder="stage1.breakdown",
                built_at=datetime.now(timezone.utc).isoformat(),
            )
        except Exception:
            logger.error(
                "Failed to stamp bible manifest (breakdown) for %s", self.project,
                exc_info=True,
            )
            raise

        return result

    # ------------------------------------------------------------------
    # Stage 1.5: Validate Global Bible (post-Gemini deterministic checks)
    # ------------------------------------------------------------------

    def validate_global_bible(self, bible: GlobalBible | None = None) -> list[str]:
        """Deterministic validation of GlobalBible after Gemini's structural pass.

        Checks from consultation (two_pass_system_prompt):
        1. Phase continuity: end_ep >= start_ep
        2. Phase overlap: phases[i+1].start_ep >= phases[i].end_ep
        3. Orphaned references: char_ids in props exist in characters
        4. Hex code validation: color_palette entries match #RRGGBB
        5. Episode monotonicity: sorted, no duplicates
        """
        if bible is None:
            bible = self._load_bible()

        errors = []

        for char_id, char in bible.characters.items():
            # Phase continuity
            for i, phase in enumerate(char.phases):
                if phase.end_ep < phase.start_ep:
                    errors.append(
                        f"{char_id}: phase '{phase.phase_id}' has end_ep={phase.end_ep} < start_ep={phase.start_ep}"
                    )

            # Phase overlap
            for i in range(len(char.phases) - 1):
                curr = char.phases[i]
                nxt = char.phases[i + 1]
                if nxt.start_ep < curr.end_ep:
                    errors.append(
                        f"{char_id}: phase '{nxt.phase_id}' starts at ep {nxt.start_ep} "
                        f"but previous phase '{curr.phase_id}' ends at ep {curr.end_ep}"
                    )

            # Episode monotonicity
            if char.episodes != sorted(set(char.episodes)):
                errors.append(f"{char_id}: episodes array not sorted or has duplicates")

        # Orphaned references in props
        for prop_id, prop in bible.props.items():
            for assoc_char in prop.associated_characters:
                if assoc_char not in bible.characters:
                    errors.append(
                        f"prop '{prop_id}': associated_character '{assoc_char}' not in bible.characters"
                    )
            if prop.episodes != sorted(set(prop.episodes)):
                errors.append(f"prop '{prop_id}': episodes array not sorted or has duplicates")

        # Orphaned references in lighting motifs
        for motif in bible.lighting_motifs:
            for loc_id in motif.associated_locations:
                if loc_id not in bible.locations:
                    errors.append(
                        f"motif '{motif.motif_id}': location '{loc_id}' not in bible.locations"
                    )
            for char_id in motif.associated_characters:
                if char_id not in bible.characters:
                    errors.append(
                        f"motif '{motif.motif_id}': character '{char_id}' not in bible.characters"
                    )

        # Hex code validation
        import re as _re
        hex_pattern = _re.compile(r"^#[0-9A-Fa-f]{6}$")
        for loc_id, loc in bible.locations.items():
            for color in loc.color_palette:
                if not hex_pattern.match(color):
                    errors.append(
                        f"location '{loc_id}': invalid hex color '{color}'"
                    )

        # ── Field boundary checks: wardrobe bleed into visual_description ──
        _wardrobe_markers = [
            "respirator", "glove", "boot", "jacket", "pants", "shirt",
            "belt", "holster", "vest", "coat", "shoes", "counter",
            "wearing", "wears", "draped", "hung", "strapped",
        ]
        for char_id, char in bible.characters.items():
            visual_low = (char.visual_description or "").lower()
            if not visual_low:
                continue
            hits = [m for m in _wardrobe_markers if m in visual_low]
            if hits:
                errors.append(
                    f"{char_id}: visual_description contains wardrobe terms "
                    f"{hits} — these belong in wardrobe_description only"
                )
            # Check for literary/narrative text (sentences with no visual value)
            _narrative_markers = [
                "someone who", "the kind of", "as if", "suggests",
                "speaks to", "the weapon", "a world of",
            ]
            narr_hits = [m for m in _narrative_markers if m in visual_low]
            if narr_hits:
                errors.append(
                    f"{char_id}: visual_description contains narrative text "
                    f"{narr_hits} — use concrete visual descriptors only"
                )

        if errors:
            logger.warning(f"Bible validation: {len(errors)} errors found")
            for err in errors:
                logger.warning(f"  - {err}")
        else:
            logger.info("Bible validation: CLEAN (0 errors)")

        return errors

    # ------------------------------------------------------------------
    # Stage 2: Storyboard Pass
    # ------------------------------------------------------------------

    def run_storyboard_pass(self, episode_num: int, bible: GlobalBible | None = None) -> EpisodePlan | None:
        """Stage 2: Generate per-episode render plan from camera-tested + bible."""
        episode_id = f"EP{episode_num:03d}"
        logger.info(f"=== Stage 2: Storyboard Pass {episode_id} ===")

        ct = self._load_camera_tested(episode_num)
        if bible is None:
            bible = self._load_bible()

        # Guard: warn if bible has unfilled enrichment placeholders
        if bible is not None:
            bible_json = bible.model_dump() if hasattr(bible, 'model_dump') else (
                bible.dict() if hasattr(bible, 'dict') else bible
            )
            if isinstance(bible_json, dict):
                unfilled = self._find_placeholders(bible_json)
                if unfilled:
                    paths_preview = ", ".join(p for p, _ in unfilled[:5])
                    more = f" (+{len(unfilled) - 5} more)" if len(unfilled) > 5 else ""
                    logger.warning(
                        f"Bible contains {len(unfilled)} unfilled [OPUS_ENRICHMENT] placeholders: "
                        f"{paths_preview}{more}. Plan quality may be degraded."
                    )

        model = STORYBOARD_MODEL  # creative-only pass runs on the free Opus OAuth lane

        system_prompt = STORYBOARD_SYSTEM_PROMPT.format(
            total_shots=ct.total_shots,
            episode_num=episode_num,
        )

        shots_text = "\n\n".join(_creative_input_block(s) for s in ct.shots)

        bible_summary = self._summarize_bible_for_episode(bible, episode_num)

        user_prompt = (
            f"# CAMERA-TESTED EPISODE {episode_id}\n\n"
            f"Total shots: {ct.total_shots}\n\n"
            f"{shots_text}\n\n"
            f"# GLOBAL BIBLE REFERENCE\n\n{bible_summary}"
        )

        schema = CreativeEpisodeOutput.model_json_schema()

        if model.startswith("claude-"):
            if self.dry_run:
                logger.info("[DRY RUN] Storyboard pass skipped")
                return None

            opus_user_prompt = self._storyboard_opus_prompt(user_prompt)
            raw_response = call_opus_oauth(
                model,
                system_prompt,
                opus_user_prompt,
                json_schema=schema,
                timeout=2400,
            )
        else:
            raw_response = self._call_gemini(
                model=model,
                system_prompt=system_prompt,
                user_prompt=user_prompt,
                response_schema=schema,
                max_tokens=65536,
            )

        if raw_response is None:
            logger.info("[DRY RUN] Storyboard pass skipped")
            return None

        errors = []
        for attempt in range(MAX_RETRIES + 1):
            if attempt > 0:
                retry_prompt = self._retry_prompt(user_prompt, errors)
                if model.startswith("claude-"):
                    raw_response = call_opus_oauth(
                        model,
                        system_prompt,
                        self._storyboard_opus_prompt(retry_prompt),
                        json_schema=schema,
                        timeout=2400,
                    )
                else:
                    raw_response = self._call_gemini(
                        model=model,
                        system_prompt=system_prompt,
                        user_prompt=retry_prompt,
                        response_schema=schema,
                        max_tokens=65536,
                    )

            try:
                creative = self._validate_creative_response(raw_response, episode_num)
            except Exception as e:
                # REC-180 final-attempt graceful degrade: a malformed axis_plans field (bad enum,
                # bad shape, non-coercible key) makes the WHOLE CreativeEpisodeOutput fail to parse
                # — strict by design. Rather than cap the episode on a single bad axis value, on the
                # final attempt drop axis_plans and re-parse; if the rest is well-formed, proceed
                # with neutral spatial derivation (honoring "malformed present plans never cap").
                recovered = None
                if attempt == MAX_RETRIES:
                    try:
                        extracted = self._extract_first_json_object(raw_response) or raw_response
                        obj = json.loads(extracted)
                        if isinstance(obj, dict) and "axis_plans" in obj:
                            obj["axis_plans"] = {}
                            recovered = self._validate_creative_response(json.dumps(obj), episode_num)
                    except Exception:
                        recovered = None
                if recovered is not None:
                    logger.warning(
                        "Attempt %s: axis_plans unparseable (%s) — dropped; proceeding with neutral "
                        "spatial derivation rather than capping the episode", attempt + 1, e,
                    )
                    creative = recovered
                else:
                    errors = [f"Creative-output validation error: {e}"]
                    logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                    if attempt == MAX_RETRIES:
                        self._send_to_review_queue(episode_id, "storyboard", errors, raw_response)
                        raise
                    continue

            # REC-180: validate the LLM-authored scene axis plans. Retry pressure on non-final
            # attempts; on final attempt DEGRADE (drop invalid plans, warn, proceed) — a
            # missing/invalid axis plan is recoverable (propagate_axis neutral-fallbacks).
            from orchestrator.axis_validation import validate_axis_plans, sanitize_axis_plans
            axis_errors = validate_axis_plans(creative, ct)
            if axis_errors:
                if attempt < MAX_RETRIES:
                    errors = axis_errors
                    logger.warning(f"Attempt {attempt + 1}: {len(axis_errors)} axis_plan validation errors")
                    continue
                dropped = sanitize_axis_plans(creative, ct)
                logger.warning(
                    "axis_plan validation failed on final attempt; dropped invalid scene plans %s "
                    "(valid plans apply; dropped/missing scenes neutral-fallback) — first errors: %s",
                    dropped, "; ".join(axis_errors[:5]),
                )
                axis_errors = []  # consumed: do not let stale axis errors leak into later messaging

            try:
                result = self._assemble_plan_from_creative(creative, ct, bible, episode_num)
            except Exception as e:
                # Assembly (which now also runs propagate_axis, REC-180) is part of the retry
                # contract: retry on non-final attempts, escalate+raise only on the final one,
                # rather than aborting the pass on a transient/parseable-but-unassemblable output.
                errors = [f"Creative assembly error: {e}"]
                logger.warning(f"Attempt {attempt + 1}: {errors[0]}")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue(episode_id, "storyboard", errors, raw_response)
                    raise
                continue

            motion_line_errors = _validate_motion_lines(result)
            if motion_line_errors:
                errors = motion_line_errors
                logger.warning(f"Attempt {attempt + 1}: {len(errors)} motion_line validation errors")
                if attempt == MAX_RETRIES:
                    self._send_to_review_queue(episode_id, "storyboard", errors, raw_response)
                    raise ValueError("; ".join(errors))
                continue

            # Validate against bible
            bible_errors = validate_plan_against_bible(result, bible)
            if bible_errors:
                errors = bible_errors
                logger.warning(f"Attempt {attempt + 1}: {len(errors)} bible validation errors")
                if attempt == MAX_RETRIES:
                    logger.warning(f"Bible validation errors after {MAX_RETRIES + 1} attempts, saving with warnings")
                    for e in errors[:10]:
                        logger.warning(f"  {e}")
                    break
                continue

            break

        # ── Post-extraction: routing + hashing + merge (ADR-R04, R05, R08) ──

        # Compute source_hash for stale detection
        raw_script = self._load_episode_script(episode_num)
        result_dict = result.model_dump(mode="json")
        result_dict["source_hash"] = hashlib.md5(raw_script.encode("utf-8")).hexdigest()

        # Route each shot + compile prompts
        from orchestrator.execution_plan import _determine_pipeline, _determine_model
        bible_dict = bible.model_dump(mode="json") if hasattr(bible, "model_dump") else dict(bible)

        for shot_dict in result_dict.get("shots", []):
            routing = shot_dict.get("routing_data", {})
            pipeline = _determine_pipeline(routing)
            model = _determine_model(pipeline, routing)
            shot_dict["pipeline"] = pipeline
            shot_dict["model"] = model

            # Per-shot source text hash for merge tracking
            source_text = shot_dict.get("source_text", "")
            shot_dict["source_text_hash"] = hashlib.md5(
                source_text.encode("utf-8")
            ).hexdigest()

            # JIT: No longer bake compiled_prompts at Stage 2.
            # Prompts are compiled at generation time from live bible data.
            # Run IP1 Plan Pass Critic (subsumes sterility gate + adds 4 more dimensions).
            try:
                from recoil.core.paths import get_config
                _cfg = get_config()
                if _cfg.get("critic_flags", {}).get("ip1_plan_pass", False):
                    from recoil.pipeline._lib.critics.plan_pass_critic import PlanPassCritic
                    _pool_dir = self.episodes_dir.parent  # projects/{project}/state/visual
                    _critic = PlanPassCritic(
                        bible=bible_dict,
                        all_shots=result_dict.get("shots", []),
                        max_attempts=2,
                        experience_pool_dir=_pool_dir,
                        shot_id=shot_dict.get("shot_id", ""),
                    )
                    shot_dict, _cr = _critic.run(shot_dict)
                    if not _cr.passed:
                        logger.warning(
                            f"IP1 critic: {shot_dict.get('shot_id', '?')} — "
                            f"{[d.name for d in _cr.hard_failures]}"
                        )
                    elif _cr.soft_failures:
                        logger.info(
                            f"IP1 critic advisory: {shot_dict.get('shot_id', '?')} — "
                            f"{[d.name for d in _cr.soft_failures]}"
                        )
                else:
                    # Fallback: original sterility gate when critic flag is off
                    from recoil.pipeline._lib.jit_prompt import validate_sterility, auto_fix_tokens
                    shot_dict = auto_fix_tokens(shot_dict, bible_dict)
                    violations = validate_sterility(shot_dict, bible_dict)
                    if violations:
                        logger.warning(
                            f"Sterility gate: {shot_dict.get('shot_id', '?')} — "
                            f"{'; '.join(violations)}"
                        )
            except Exception as e:
                # Graceful degradation: if critic fails, try original sterility gate
                try:
                    from recoil.pipeline._lib.jit_prompt import validate_sterility, auto_fix_tokens
                    shot_dict = auto_fix_tokens(shot_dict, bible_dict)
                    violations = validate_sterility(shot_dict, bible_dict)
                    if violations:
                        logger.warning(
                            f"Sterility gate (fallback): {shot_dict.get('shot_id', '?')} — "
                            f"{'; '.join(violations)}"
                        )
                except Exception as e2:
                    logger.warning(f"Sterility gate failed for {shot_dict.get('shot_id', '?')}: {e2}")

        # Merge with prior plan if it exists (preserve human edits)
        out_path = self.episodes_dir / f"ep_{episode_num:03d}_plan.json"
        if out_path.exists():
            try:
                old_plan = json.loads(out_path.read_text(encoding="utf-8"))
                result_dict = merge_with_prior_plan(old_plan, result_dict)
                logger.info("Merged with prior plan (preserved human overrides)")
            except Exception as e:
                logger.warning(f"Merge with prior plan failed: {e}")

        overrides = plan_overrides.load_overrides(self.project, episode_num)
        live_spans = {
            s["shot_id"]: s.get("source_text_hash")
            for s in result_dict.get("shots", [])
        }
        result_dict, flags = plan_overrides.apply_overrides(result_dict, overrides, live_spans)
        for flag in flags:
            logger.warning(
                "Plan override flagged for %s: %s",
                flag.get("shot_id"),
                flag.get("reason"),
            )

        # Spatial validation — auto-correct contradictions before saving
        try:
            from orchestrator.spatial_validation import validate_spatial_data
            # REC-180: screen_direction is now DERIVED by propagate_axis; run spatial validation
            # in ADVISORY mode (auto_correct=False) so it cannot overwrite the deterministic
            # projection. (This also makes its toward-camera/CU normalizations warn-only — an
            # accepted v1 tradeoff; validate_axis_plans requires non-neutral anchors to be
            # LATERAL, so the projector emits toward-camera/away-from-camera ONLY from a
            # neutral-kind anchor with an explicit toward/away reference — a deliberate choice.)
            sv_result = validate_spatial_data(result_dict, auto_correct=False)
            if sv_result["corrections"]:
                for c in sv_result["corrections"]:
                    logger.info(
                        "  [SPATIAL FIX] %s: %s → %s (%s)",
                        c["shot_id"], c["old"], c["new"], c["reason"],
                    )
            if sv_result["warnings"]:
                for w in sv_result["warnings"]:
                    logger.warning("  [SPATIAL WARN] %s: %s", w["shot_id"], w["message"])
        except Exception as e:
            logger.warning(f"Spatial validation failed (non-fatal): {e}")

        # Persist
        self._save_json(out_path, result_dict)
        logger.info(f"Plan complete: {result.total_shots} shots")

        # Stamp the derivation manifest (REC-164 Phase 3). Best-effort-but-loud:
        # a manifest write failure FAILs the stage (MINOR-7). The plan's source
        # keys on the previously-stamped camera_tested content + project bible.
        try:
            derivation_manifest.stamp_stage(
                self.project,
                episode_num,
                "plan",
                kind="derived",
                structural_sha=plan_structural_sha(result_dict),
                content_sha=content_sha(result_dict),
                source={
                    "camera_tested_content_sha": derivation_manifest.load(self.project, episode_num)["stages"].get("camera_tested", {}).get("content_sha"),
                    "bible_content_sha": derivation_manifest.load_bible(self.project).get("content_sha"),
                },
                builder="stage2.plan",
                model=STORYBOARD_MODEL,
                via="oauth",
                extra={"shot_ids": [s["shot_id"] for s in result_dict["shots"]]},
            )
        except Exception:
            logger.error(
                "Failed to stamp plan manifest for ep_%03d", episode_num,
                exc_info=True,
            )
            raise

        return result

    def _load_project_config(self) -> dict:
        """Load project_config.json from the Recoil project directory."""
        config_path = self.project_root / "visual" / "project_config.json"
        if config_path.exists():
            return json.loads(config_path.read_text(encoding="utf-8"))
        return {}

    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------

    def _resolve_model(self, gemini_model: str) -> str:
        """Return the configured extraction model.

        If extraction_model is set to a Gemini variant, use that directly.
        If it's 'opus-4.6', the caller should dispatch via sub-agent skill
        instead. For Gemini API calls, this returns the gemini_model fallback.
        """
        if self.extraction_model.startswith("gemini"):
            return self.extraction_model
        # For opus, fall back to the Gemini model for API calls
        # (sub-agent dispatch happens at the orchestrator level)
        return gemini_model

    def _summarize_bible_for_episode(self, bible: GlobalBible, episode_num: int) -> str:
        """Create a focused bible summary for a specific episode."""
        parts = []

        # Characters active in this episode
        parts.append("## Characters\n")
        for char_id, char in bible.characters.items():
            if episode_num in char.episodes or not char.episodes:
                phase = char.phase_for_episode(episode_num)
                phase_desc = f" [{phase.phase_id}]: {phase.wardrobe_description}" if phase else ""
                parts.append(f"- **{char_id}** ({char.display_name}): {char.visual_description}{phase_desc}")

        # Locations
        parts.append("\n## Locations\n")
        for loc_id, loc in bible.locations.items():
            parts.append(f"- **{loc_id}**: {loc.description}")

        # Props
        parts.append("\n## Props\n")
        for prop_id, prop in bible.props.items():
            parts.append(f"- **{prop_id}**: {prop.description}")

        return "\n".join(parts)

    @staticmethod
    def _canonical_char_id(name: str, bible: GlobalBible) -> str | None:
        """Resolve a camera-test character mention to the bible's canonical key."""
        target = name.strip().casefold()
        if not target:
            return None

        for char_id, char in bible.characters.items():
            if target == char_id.strip().casefold():
                return char_id
            if target == char.display_name.strip().casefold():
                return char_id

            aliases = getattr(char, "aliases", None) or []
            for alias in aliases:
                if target == str(alias).strip().casefold():
                    return char_id

        return None

    @staticmethod
    def _canonicalize_heading(raw: str | None) -> str:
        """Normalize a scene heading for location matching: strip a leading
        INT./EXT. scene-type token, strip a trailing ' - DAY/NIGHT/CONTINUOUS/...'
        time-of-day segment (ONLY known TOD tokens — never a meaningful location
        suffix), collapse whitespace, casefold. Returns '' when nothing remains."""
        s = (raw or "").strip()
        s = _SCENE_TYPE_PREFIX_RE.sub("", s)
        s = _TIME_OF_DAY_SUFFIX_RE.sub("", s)
        s = re.sub(r"\s+", " ", s).strip().casefold()
        return s

    @staticmethod
    def _resolve_location_id(location_hint: str, bible: GlobalBible) -> str:
        """Resolve a (already carry-forward-filled) location heading to a bible
        location_id. Canonicalizes heading + each candidate; raises
        LocationUnresolvedError on no match — NEVER defaults to the first location."""
        if not bible.locations:
            raise ValueError("bible has no locations; cannot resolve location_id")

        target = IngestPipeline._canonicalize_heading(location_hint)
        if not target:
            raise LocationUnresolvedError(
                f"empty location heading {location_hint!r} cannot be resolved "
                "(carry-forward should have filled it upstream)",
                hint=location_hint,
            )

        for loc_id, loc in bible.locations.items():
            if target == IngestPipeline._canonicalize_heading(loc_id):
                return loc_id
            for alias in loc.aliases:
                if target == IngestPipeline._canonicalize_heading(alias):
                    return loc_id

        raise LocationUnresolvedError(
            f"location heading {location_hint!r} did not resolve to any bible "
            f"location (known: {list(bible.locations)}); add the heading to that "
            "location's bible aliases or fix the screenplay heading — NOT defaulting",
            hint=location_hint,
        )

    @staticmethod
    def _carry_forward_location_hints(ct: CameraTestedEpisode) -> dict[int, str]:
        """Walk camera-tested shots in shot-index order; each null/blank
        location_hint inherits the most-recent heading. A headingless FIRST shot
        (no prior heading to inherit) aborts fail-loud (D4)."""
        effective: dict[int, str] = {}
        current: str | None = None
        for shot in sorted(ct.shots, key=lambda s: s.shot_index):
            hint = (shot.location_hint or "").strip()
            if hint:
                current = hint
            if current is None:
                raise LocationUnresolvedError(
                    f"shot {shot.shot_index} opens the episode with no scene heading "
                    "and no prior heading to inherit; add an INT./EXT. heading to the "
                    "screenplay — NOT defaulting",
                    shot_index=shot.shot_index,
                )
            effective[shot.shot_index] = current
        return effective

    def _assemble_plan_from_creative(
        self,
        creative: CreativeEpisodeOutput,
        ct: CameraTestedEpisode,
        bible: GlobalBible,
        episode_num: int,
    ) -> EpisodePlan:
        """Assemble a full EpisodePlan from minimal creative LLM output."""
        ct_by_index = {shot.shot_index: shot for shot in ct.shots}
        ct_indices = set(ct_by_index)
        creative_indices = [shot.shot_index for shot in creative.shots]
        extra_indices = sorted(set(creative_indices) - ct_indices)
        if extra_indices:
            raise ValueError(f"creative shot_index {extra_indices[0]} has no camera-tested match")
        duplicate_indices = sorted(
            index for index in set(creative_indices) if creative_indices.count(index) > 1
        )
        if duplicate_indices:
            raise ValueError(f"creative shot_index duplicates: {duplicate_indices}")
        missing_indices = sorted(ct_indices - set(creative_indices))
        if missing_indices:
            raise ValueError(f"creative output missing camera-tested shot_index values: {missing_indices}")

        shot_records: list[ShotRecord] = []
        effective_hints = self._carry_forward_location_hints(ct)

        for c in sorted(creative.shots, key=lambda shot: shot.shot_index):
            if c.shot_index not in ct_by_index:
                raise ValueError(f"creative shot_index {c.shot_index} has no camera-tested match")
            cts = ct_by_index[c.shot_index]

            characters: list[ShotCharacter] = []
            dropped: list[str] = []
            for name in cts.characters_mentioned:
                cid = self._canonical_char_id(name, bible)
                if cid is None:
                    dropped.append(name)
                    continue

                phase = bible.characters[cid].phase_for_episode(episode_num)
                wardrobe_phase_id = phase.phase_id if phase else ""
                characters.append(
                    ShotCharacter(
                        char_id=cid,
                        wardrobe_phase_id=wardrobe_phase_id,
                        emotion_keyword="neutral",
                        screen_position=ScreenPosition.CENTER,
                        visibility=Visibility.IN_FRAME,
                    )
                )

            if cts.characters_mentioned and not characters:
                raise ValueError(
                    f"shot {c.shot_index}: characters_mentioned={cts.characters_mentioned!r} "
                    f"resolved to ZERO bible characters (bible keys={list(bible.characters)}); "
                    "canonicalization gap — do not silently emit an env-only shot"
                )
            if dropped:
                logger.warning(
                    "shot %s: dropped unresolved characters_mentioned tokens %r",
                    c.shot_index,
                    dropped,
                )

            try:
                location_id = self._resolve_location_id(effective_hints[c.shot_index], bible)
            except LocationUnresolvedError as exc:
                if exc.shot_index is None:
                    exc.shot_index = c.shot_index
                raise
            num_characters = len([ch for ch in characters if ch.visibility == Visibility.IN_FRAME])

            loc = bible.locations.get(location_id)
            if loc and loc.lighting_profile:
                light_source = LightSource(
                    motivator=c.light_motivator or loc.lighting_profile.primary_source,
                    direction=loc.lighting_profile.direction,
                    quality=loc.lighting_profile.quality,
                    color_temp=loc.lighting_profile.color_temp,
                    intensity=LightIntensity.MODERATE,
                )
            else:
                light_source = LightSource(motivator=c.light_motivator or "ambient practical light")
            lighting = Lighting(dominant_source_index=0, sources=[light_source])

            shot_records.append(
                ShotRecord(
                    shot_id=f"EP{episode_num:03d}_SH{c.shot_index:02d}",
                    shot_index=c.shot_index,
                    scene_index=cts.scene_index,
                    source_text=cts.source_text,
                    origin="script_derived",
                    routing_data=RoutingData(
                        num_characters=num_characters,
                        is_env_only=num_characters == 0,
                        has_dialogue=cts.has_dialogue,
                        target_editorial_duration_s=c.target_editorial_duration_s,
                        camera_complexity=c.camera_movement,
                        narrative_requires_match_cut=c.narrative_requires_match_cut,
                    ),
                    prompt_data=PromptData(
                        shot_type=c.shot_type,
                        camera_movement=c.camera_movement,
                        focal_length="50mm",
                        kinetic_action=c.kinetic_action,
                        lighting=lighting,
                        prompt_skeleton=c.prompt_skeleton,
                    ),
                    spatial_data=SpatialData(),
                    asset_data=AssetData(
                        location_id=location_id,
                        time_of_day=TimeOfDay.INTERIOR,
                        visual_mode=VisualMode.REALITY,
                        characters=characters,
                        props=[],
                        prop_interaction=PropInteraction.NONE,
                    ),
                    audio_data=AudioData(dialogue=[], ambient_sfx="", foley_action=""),
                    blocking_metadata=None,
                )
            )

        plan = EpisodePlan(
            episode_id=ct.episode_id,
            project=self.project,
            total_shots=ct.total_shots,
            shots=shot_records,
            axis_plans=creative.axis_plans,
        )
        # REC-180: materialize per-shot spatial_data from the scene axis plans (replaces the
        # spatial_data=SpatialData() stub above), then warn if the result is spatially monotone.
        from orchestrator.axis_propagation import propagate_axis
        from orchestrator.axis_validation import degenerate_variation
        propagate_axis(plan)
        for w in degenerate_variation(plan):
            logger.warning("  [SPATIAL VARY] %s", w)
        return plan

    @staticmethod
    def _opus_json_only_suffix(object_name: str) -> str:
        """Brutally directive JSON-only contract for Opus OAuth structured calls.

        Opus-via-CLI tends to chat ('Camera-test complete...') instead of
        emitting raw JSON even with a json_schema hint; this forces pure JSON.
        """
        return (
            "\n\nCRITICAL OUTPUT CONTRACT — READ CAREFULLY:\n"
            f"Respond with ONLY a single raw JSON object: the exact {object_name}.\n"
            "Your ENTIRE response MUST begin with the character '{' and end with '}'.\n"
            "Do NOT write ANY preamble, acknowledgment (e.g. 'Camera-test complete', "
            "'Here is...'), explanation, reasoning, commentary, or markdown code fences.\n"
            "No text whatsoever before or after the JSON object. Output the JSON and "
            "nothing else — any other content makes the response invalid and rejected."
        )

    def _storyboard_opus_prompt(self, user_prompt: str) -> str:
        """Append structured-output discipline for Opus OAuth storyboard calls."""
        return f"{user_prompt}{self._opus_json_only_suffix('CreativeEpisodeOutput object')}"

    def _camera_test_opus_prompt(self, user_prompt: str) -> str:
        """Append structured-output discipline for Opus OAuth camera-test calls."""
        return f"{user_prompt}{self._opus_json_only_suffix('CameraTestedEpisode object')}"

    def _validate_camera_test_response(
        self, raw_response: str, episode_num: int
    ) -> CameraTestedEpisode:
        """Validate raw camera-test JSON with a conservative extraction fallback."""
        try:
            return CameraTestedEpisode.model_validate_json(raw_response)
        except Exception as original_error:
            logger.warning(
                "camera-test raw response not pure JSON (len=%d); first 400 chars: %r",
                len(raw_response or ""), (raw_response or "")[:400],
            )
            extracted = self._extract_first_json_object(raw_response)
            if extracted is None:
                raise original_error
            if extracted != raw_response:
                try:
                    return CameraTestedEpisode.model_validate_json(extracted)
                except Exception:
                    pass
            try:
                obj = json.loads(extracted)
                if (
                    isinstance(obj, dict)
                    and "shots" in obj
                    and any(key not in obj for key in ("episode_id", "project", "total_shots"))
                ):
                    obj.setdefault("episode_id", f"EP{episode_num:03d}")
                    obj.setdefault("project", self.project)
                    obj.setdefault("total_shots", len(obj["shots"]))
                    return CameraTestedEpisode.model_validate(obj)
            except Exception:
                pass
            raise original_error

    def _validate_creative_response(
        self, raw_response: str, episode_num: int
    ) -> CreativeEpisodeOutput:
        """Validate raw creative JSON with a conservative extraction fallback."""
        try:
            return CreativeEpisodeOutput.model_validate_json(raw_response)
        except Exception as original_error:
            extracted = self._extract_first_json_object(raw_response)
            if extracted is None:
                raise original_error
            if extracted != raw_response:
                try:
                    return CreativeEpisodeOutput.model_validate_json(extracted)
                except Exception:
                    pass
            try:
                obj = json.loads(extracted)
                if (
                    isinstance(obj, dict)
                    and "shots" in obj
                    and any(key not in obj for key in ("episode_id", "total_shots"))
                ):
                    obj.setdefault("episode_id", f"EP{episode_num:03d}")
                    obj.setdefault("total_shots", len(obj["shots"]))
                    return CreativeEpisodeOutput.model_validate(obj)
            except Exception:
                pass
            raise original_error

    def _validate_breakdown_response(self, raw_response: str) -> GlobalBible:
        """Validate raw breakdown JSON with a conservative extraction fallback."""
        try:
            return GlobalBible.model_validate_json(raw_response)
        except Exception:
            extracted = self._extract_first_json_object(raw_response)
            if extracted is None or extracted == raw_response:
                raise
            return GlobalBible.model_validate_json(extracted)

    def _extract_first_json_object(self, text: str) -> str | None:
        """Extract a fenced JSON block or the first balanced JSON object."""
        fenced = re.search(r"```(?:json)?\s*(.*?)```", text, re.IGNORECASE | re.DOTALL)
        if fenced:
            return fenced.group(1).strip()

        start = text.find("{")
        if start == -1:
            return None

        depth = 0
        in_string = False
        escaped = False
        for index, char in enumerate(text[start:], start=start):
            if in_string:
                if escaped:
                    escaped = False
                elif char == "\\":
                    escaped = True
                elif char == '"':
                    in_string = False
                continue

            if char == '"':
                in_string = True
            elif char == "{":
                depth += 1
            elif char == "}":
                depth -= 1
                if depth == 0:
                    return text[start:index + 1]

        return text[start:].strip()

    def _retry_prompt(self, original_prompt: str, errors: list[str]) -> str:
        """Build a retry prompt that includes the original + error feedback."""
        error_text = "\n".join(f"- {e}" for e in errors[:20])
        return (
            f"{original_prompt}\n\n"
            f"---\n\n"
            f"YOUR PREVIOUS OUTPUT HAD ERRORS. Fix these issues:\n\n"
            f"{error_text}\n\n"
            f"Output valid JSON that passes all validation checks."
        )

    def _send_to_review_queue(
        self, entity_id: str, stage: str, errors: list[str], raw_response: str
    ) -> None:
        """Save failed extraction to review queue for human inspection."""
        queue_path = DATA_DIR / "review_queue.json"
        queue = []
        if queue_path.exists():
            queue = json.loads(queue_path.read_text())

        queue.append({
            "entity_id": entity_id,
            "stage": stage,
            "errors": errors,
            "raw_response": raw_response[:5000],  # Truncate
            "timestamp": datetime.now().isoformat(),
        })

        queue_path.write_text(json.dumps(queue, indent=2))
        logger.error(f"Sent to review queue: {entity_id} ({stage}) — {len(errors)} errors")

    # ------------------------------------------------------------------
    # Stage 2.5: Blocking Pass
    # ------------------------------------------------------------------

    def run_blocking_pass(
        self,
        episode_num: int,
        scene_indices: list[int] | None = None,
    ) -> dict | None:
        """Stage 2.5: Generate physical blocking for all shots in an episode.

        Must run AFTER run_storyboard_pass() — requires an existing plan JSON.
        Processes per-scene, generates Scene Blocking Documents (SBDs),
        and writes blocking_metadata + updated subject_lines back to the plan.

        Args:
            episode_num: Episode number.
            scene_indices: Optional list of specific scenes to (re-)process.

        Returns:
            Dict with stats, or None on dry run.
        """
        from orchestrator.blocking_pass import BlockingPass

        episode_id = f"EP{episode_num:03d}"
        logger.info(f"=== Stage 2.5: Blocking Pass {episode_id} ===")

        bp = BlockingPass(
            project=self.project,
            project_root=self.project_root,
            dry_run=self.dry_run,
        )

        return bp.run(
            episode_num=episode_num,
            scene_indices=scene_indices,
        )

    # ------------------------------------------------------------------
    # Full pipeline
    # ------------------------------------------------------------------

    def run_full(self, episode_nums: list[int] | None = None) -> dict:
        """Run all three stages for the specified episodes.

        Uses batch_size to process episodes in batches for camera-test
        and breakdown passes.
        """
        if episode_nums is None:
            episode_nums = self._episode_ids()

        results = {
            "camera_tested": {},
            "bible": None,
            "enrichment": None,
            "plans": {},
        }

        # Stage 0: Camera-test in batches
        logger.info(f"Starting full pipeline for {len(episode_nums)} episodes (batch_size={self.batch_size})")
        for i in range(0, len(episode_nums), self.batch_size):
            batch = episode_nums[i:i + self.batch_size]
            logger.info(f"Camera-test batch: EP{batch[0]:03d}-EP{batch[-1]:03d}")
            for ep_num in batch:
                try:
                    ct = self.run_camera_test(episode_num=ep_num)
                    if ct is not None:
                        results["camera_tested"][ep_num] = ct
                except Exception as e:
                    logger.error(f"Camera-test failed for EP{ep_num:03d}: {e}")

        if self.dry_run:
            logger.info("[DRY RUN] Full pipeline dry-run complete")
            return results

        # Stage 1: Global bible
        try:
            bible = self.run_breakdown_pass(episode_nums)
            results["bible"] = bible
        except Exception as e:
            logger.error(f"Breakdown pass failed: {e}")
            return results

        if bible is None:
            return results

        # Stage 1.5: Opus Enrichment (fills [OPUS_ENRICHMENT] placeholders)
        if getattr(self, '_skip_enrichment', False):
            logger.info("Opus enrichment skipped (--skip-enrichment flag)")
            results["enrichment"] = {"skipped": True, "reason": "flag"}
        else:
            try:
                enrichment_result = self.run_opus_enrichment()
                results["enrichment"] = enrichment_result
                if enrichment_result.get("filled", 0) > 0:
                    bible = self._load_bible()
            except Exception as e:
                logger.warning(
                    f"Opus enrichment skipped: {e}. "
                    f"Bible may contain [OPUS_ENRICHMENT] placeholders."
                )
                results["enrichment"] = {"skipped": True, "reason": str(e)}

        # Stage 2: Per-episode plans
        for ep_num in episode_nums:
            if ep_num not in results["camera_tested"]:
                continue
            try:
                plan = self.run_storyboard_pass(ep_num, bible)
                if plan is not None:
                    results["plans"][ep_num] = plan
            except Exception as e:
                logger.error(f"Storyboard pass failed for EP{ep_num:03d}: {e}")

        logger.info(
            f"Pipeline complete: {len(results['camera_tested'])} camera-tested, "
            f"{'1 bible' if results['bible'] else 'no bible'}, "
            f"{len(results['plans'])} plans"
        )
        return results

    # ------------------------------------------------------------------
    # Stage 1.5: Opus Enrichment
    # ------------------------------------------------------------------

    @staticmethod
    def _find_placeholders(obj, path=""):
        """Recursively find all [OPUS_ENRICHMENT] placeholders in a dict.

        Returns list of (dotpath, field_type) tuples.
        E.g., [("characters.SADIE.visual_description", "visual_description"),
               ("locations.int_bar.atmosphere", "atmosphere")]
        """
        results = []
        if isinstance(obj, str) and "[OPUS_ENRICHMENT]" in obj:
            # Derive field_type from the last segment, stripping list indices
            raw_field = path.rsplit(".", 1)[-1] if "." in path else path
            field_type = re.sub(r'\[\d+\]$', '', raw_field)
            results.append((path, field_type))
        elif isinstance(obj, dict):
            for k, v in obj.items():
                child_path = f"{path}.{k}" if path else k
                results.extend(IngestPipeline._find_placeholders(v, child_path))
        elif isinstance(obj, list):
            for i, v in enumerate(obj):
                results.extend(IngestPipeline._find_placeholders(v, f"{path}[{i}]"))
        return results

    @staticmethod
    def _set_by_dotpath(obj, dotpath, value):
        """Set a value in a nested dict using dot notation.

        Handles both dict keys and list indices (e.g., "phases[0].wardrobe").
        """
        parts = re.split(r'\.(?![^\[]*\])', dotpath)
        current = obj
        for i, part in enumerate(parts[:-1]):
            # Handle list index
            m = re.match(r'(.+)\[(\d+)\]$', part)
            if m:
                current = current[m.group(1)][int(m.group(2))]
            else:
                current = current[part]
        # Set the final value
        last = parts[-1]
        m = re.match(r'(.+)\[(\d+)\]$', last)
        if m:
            current[m.group(1)][int(m.group(2))] = value
        else:
            current[last] = value

    @staticmethod
    def _extract_character_dna(characters_md: str, character_name: str) -> str:
        """Extract the section for a specific character from characters.md.

        Handles both prose sections (## SADIE) and markdown tables.
        Splits on any heading level (##, ###) and matches case-insensitively.
        Falls back to table row extraction if headers don't match.
        Returns the full section text, or empty string if not found.
        """
        # 1. Try header splitting (handles ## SADIE, ### SADIE, etc.)
        sections = re.split(r'^#+\s+', characters_md, flags=re.MULTILINE)
        for section in sections:
            first_line = section.strip().split('\n')[0] if section.strip() else ""
            # Match first word exactly to avoid substring false positives (e.g., "LIA" matching "AMELIA")
            first_word = first_line.split()[0].upper() if first_line.strip() else ""
            if character_name.upper() == first_word:
                return section.strip()

        # 2. Fallback: table row extraction (| SADIE | ... |)
        lines = characters_md.split('\n')
        for i, line in enumerate(lines):
            if line.strip().startswith('|') and re.search(rf'\b{re.escape(character_name)}\b', line, re.IGNORECASE):
                header_lines = []
                for j in range(i - 1, -1, -1):
                    if not lines[j].strip().startswith('|'):
                        break
                    header_lines.insert(0, lines[j])
                if len(header_lines) >= 2 and '-' in header_lines[1]:
                    return f"{header_lines[0]}\n{header_lines[1]}\n{line}"
                return line

        return ""

    @staticmethod
    def _extract_world_section(characters_md: str) -> str:
        """Extract 'The World' or 'Visual Grammar' sections from characters.md.

        These sections are relevant for location atmosphere enrichment.
        """
        sections = re.split(r'^## ', characters_md, flags=re.MULTILINE)
        relevant = []
        for section in sections:
            first_line = section.strip().split('\n')[0].lower() if section.strip() else ""
            if any(kw in first_line for kw in ("world", "visual grammar", "influences", "bleed")):
                relevant.append("## " + section.strip())
        return "\n\n".join(relevant)

    def _call_opus(self, system_prompt: str, user_prompt: str) -> dict:
        """Call Opus-OAuth for enrichment text."""
        text = call_opus_oauth(OPUS_ENRICHMENT_MODEL, system_prompt, user_prompt)
        return {"text": text.strip(), "cost": 0.0, "input_tokens": 0, "output_tokens": 0}

    def run_opus_enrichment(self, bible_path: Path | None = None) -> dict:
        """Stage 1.5: Fill [OPUS_ENRICHMENT] placeholders in the global bible via Opus.

        Groups placeholders by entity (character or location) for coherent
        per-entity enrichment. Each entity gets one Opus call with all its
        placeholders, the relevant character DNA, and bible structural data.

        Args:
            bible_path: Path to global_bible.json. Defaults to self.bible_path.

        Returns:
            dict with keys: filled (int), skipped (int), failed (list),
            total_cost (float), details (list of per-call results)
        """
        if bible_path is None:
            bible_path = self.bible_path

        if not bible_path.exists():
            raise FileNotFoundError(f"Bible not found: {bible_path}")

        bible = json.loads(bible_path.read_text(encoding="utf-8"))
        placeholders = self._find_placeholders(bible)

        if not placeholders:
            logger.info("No [OPUS_ENRICHMENT] placeholders found — bible already enriched")
            return {"filled": 0, "skipped": 0, "failed": [], "total_cost": 0.0, "details": []}

        logger.info(f"=== Stage 1.5: Opus Enrichment ({len(placeholders)} placeholders) ===")

        # Load character DNA if available
        dna_path = self.project_root / "bible" / "characters.md"
        characters_md = dna_path.read_text(encoding="utf-8") if dna_path.exists() else ""
        if not characters_md:
            logger.warning("No bible/characters.md found — enrichment will use structural data only")

        # Group placeholders by entity
        # e.g., {"characters.SADIE": [("characters.SADIE.visual_description", "visual_description"), ...]}
        entity_groups: dict[str, list[tuple[str, str]]] = {}
        for dotpath, field_type in placeholders:
            parts = dotpath.split(".")
            if len(parts) >= 2:
                entity_key = f"{parts[0]}.{parts[1]}"  # e.g., "characters.SADIE"
            else:
                entity_key = "_project"  # top-level fields like wardrobe_philosophy
            entity_groups.setdefault(entity_key, []).append((dotpath, field_type))

        results = {"filled": 0, "skipped": 0, "failed": [], "total_cost": 0.0, "details": []}

        for entity_key, fields in entity_groups.items():
            entity_type = entity_key.split(".")[0] if "." in entity_key else "project"
            entity_name = entity_key.split(".")[1] if "." in entity_key else entity_key

            logger.info(f"Enriching {entity_key} ({len(fields)} fields)")

            # Build context for this entity
            if entity_type == "characters":
                # Character: extract DNA section + bible structural data
                dna_section = self._extract_character_dna(characters_md, entity_name)
                if not dna_section and characters_md:
                    dna_section = characters_md  # Fallback to full file
                bible_entry = bible.get("characters", {}).get(entity_name, {})
                context_block = f"CHARACTER: {entity_name}\n\n"
                if dna_section:
                    context_block += f"--- CHARACTER DNA (creative intent) ---\n{dna_section}\n\n"
                context_block += f"--- BIBLE STRUCTURAL DATA ---\n{json.dumps(bible_entry, indent=2)}\n"

            elif entity_type == "locations":
                # Location: bible structural data + world section from DNA
                bible_entry = bible.get("locations", {}).get(entity_name, {})
                world_section = self._extract_world_section(characters_md)
                context_block = f"LOCATION: {entity_name}\n\n"
                if world_section:
                    context_block += f"--- WORLD CONTEXT ---\n{world_section}\n\n"
                context_block += f"--- BIBLE STRUCTURAL DATA ---\n{json.dumps(bible_entry, indent=2)}\n"

            else:
                # Project-level (wardrobe_philosophy)
                context_block = f"PROJECT: {self.project}\n\n"
                if characters_md:
                    world_section = self._extract_world_section(characters_md)
                    if world_section:
                        context_block += f"--- WORLD CONTEXT ---\n{world_section}\n\n"
                # Include character roster summary
                char_names = list(bible.get("characters", {}).keys())
                context_block += f"--- CHARACTERS ---\n{', '.join(char_names)}\n"

            # Build per-field prompts within this entity
            field_prompts = []
            for dotpath, field_type in fields:
                fmt = FORMAT_INSTRUCTIONS.get(field_type, FORMAT_INSTRUCTIONS.get("atmosphere"))
                field_prompts.append(f"### {dotpath}\nFormat: {fmt}\n")

            system_prompt = (
                "You are a creative director filling in visual production descriptions for a "
                "vertical microdrama series. Each field has specific format requirements. "
                "Output ONLY the requested content for each field, clearly labeled. "
                "Do NOT add commentary, explanations, or preamble.\n\n"
                "For EACH field below, output:\n"
                "FIELD: {dotpath}\n"
                "VALUE: {your content}\n\n"
                "Separate fields with a blank line."
            )

            user_prompt = f"{context_block}\n\n--- FIELDS TO FILL ---\n\n" + "\n".join(field_prompts)

            try:
                result = self._call_opus(system_prompt, user_prompt)
                logger.info(
                    f"  Opus response: {result['input_tokens']} in / {result['output_tokens']} out, "
                    f"${result['cost']:.4f}"
                )

                # Parse response — extract FIELD:/VALUE: pairs
                response_text = result["text"]
                filled_count = 0
                for dotpath, field_type in fields:
                    # Look for FIELD: {dotpath}\nVALUE: {content} (tolerates markdown bolding)
                    pattern = rf"(?i)\**FIELD:\**\s*{re.escape(dotpath)}\s*\n\**VALUE:\**\s*(.*?)(?=\n\**FIELD:|\Z)"
                    match = re.search(pattern, response_text, re.DOTALL)
                    if match:
                        value = match.group(1).strip().strip('`').strip()
                        if value and "[OPUS_ENRICHMENT]" not in value:
                            self._set_by_dotpath(bible, dotpath, value)
                            filled_count += 1
                            logger.info(f"  Filled: {dotpath} ({len(value)} chars)")
                        else:
                            logger.warning(f"  Empty or placeholder value for {dotpath}")
                            results["failed"].append(dotpath)
                    else:
                        logger.warning(f"  Could not parse response for {dotpath}")
                        results["failed"].append(dotpath)

                results["filled"] += filled_count
                results["total_cost"] += result["cost"]
                results["details"].append({
                    "entity": entity_key,
                    "fields_requested": len(fields),
                    "fields_filled": filled_count,
                    "cost": result["cost"],
                })

            except OpusOAuthError as e:
                logger.error(f"  Opus call failed for {entity_key}: {e}")
                for dotpath, _ in fields:
                    # Mark failed placeholders so they're visible
                    self._set_by_dotpath(bible, dotpath, f"[OPUS_ENRICHMENT_FAILED: {e}]")
                    results["failed"].append(dotpath)
                results["details"].append({
                    "entity": entity_key,
                    "fields_requested": len(fields),
                    "fields_filled": 0,
                    "cost": 0.0,
                    "error": str(e),
                })
            except Exception as e:
                logger.error(f"  Opus call failed for {entity_key}: {e}")
                for dotpath, _ in fields:
                    # Mark failed placeholders so they're visible
                    self._set_by_dotpath(bible, dotpath, f"[OPUS_ENRICHMENT_FAILED: {e}]")
                    results["failed"].append(dotpath)

        # Atomic write — tempfile + os.replace
        import tempfile
        fd, tmp = tempfile.mkstemp(dir=str(bible_path.parent), suffix=".json")
        try:
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                json.dump(bible, f, indent=2, ensure_ascii=False)
            os.replace(tmp, str(bible_path))
            logger.info(f"Enriched bible written: {bible_path}")
        except Exception:
            try:
                os.unlink(tmp)
            except OSError:
                pass
            raise

        # Re-stamp the PROJECT-LEVEL bible record after enrichment (builder flips
        # to stage1_5.enrich, MAJOR-3). REC-164 Phase 3, best-effort-but-loud.
        try:
            derivation_manifest.stamp_bible(
                self.project,
                content_sha=content_sha(bible),
                builder="stage1_5.enrich",
                built_at=datetime.now(timezone.utc).isoformat(),
            )
        except Exception:
            logger.error(
                "Failed to stamp bible manifest (enrich) for %s", self.project,
                exc_info=True,
            )
            raise

        logger.info(
            f"Enrichment complete: {results['filled']} filled, "
            f"{len(results['failed'])} failed, ${results['total_cost']:.4f}"
        )
        return results


# ---------------------------------------------------------------------------
# Plan Merge (ADR-R08)
# ---------------------------------------------------------------------------


def merge_with_prior_plan(old_plan: dict, new_plan: dict) -> dict:
    """Merge a new plan with a prior one, preserving human edits.

    Match shots by source_text_hash (NOT shot_id) so that re-ordering
    or re-numbering doesn't lose manual work.

    Preserves from old plan:
      - manual_prompt_override
      - human_approvals
      - review_status
      - selected_take_id

    Flags changed/new shots with `needs_review: True`.

    Args:
        old_plan: Previously saved plan dict.
        new_plan: Freshly generated plan dict.

    Returns:
        Merged plan dict.
    """
    # Build lookup from old plan keyed by source_text_hash
    old_by_hash: dict[str, dict] = {}
    for shot in old_plan.get("shots", []):
        h = shot.get("source_text_hash", "")
        if h:
            old_by_hash[h] = shot

    # Merge
    merged_shots = []
    for shot in new_plan.get("shots", []):
        new_hash = shot.get("source_text_hash", "")
        old_shot = old_by_hash.get(new_hash) if new_hash else None

        if old_shot:
            # Unchanged shot — preserve human edits
            _PRESERVE_FIELDS = [
                "manual_prompt_override",
                "human_approvals",
                "review_status",
                "selected_take_id",
            ]
            for field in _PRESERVE_FIELDS:
                if field in old_shot:
                    shot[field] = old_shot[field]
            shot["needs_review"] = False
        else:
            # New or changed shot — flag for Dailies review
            shot["needs_review"] = True

        merged_shots.append(shot)

    new_plan["shots"] = merged_shots

    # Count changes
    changed = sum(1 for s in merged_shots if s.get("needs_review"))
    if changed:
        logger.info(f"Merge: {changed}/{len(merged_shots)} shots flagged as new/changed")

    return new_plan


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------

def main():
    import argparse

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        datefmt="%H:%M:%S",
    )

    parser = argparse.ArgumentParser(description="Starsend Render Extraction Pipeline")
    parser.add_argument("--project", default="leviathan", help="Project name")
    # Default resolved at parse-time so RECOIL_PROJECTS_ROOT env override applies.
    from recoil.core.paths import projects_root as _projects_root_fn
    try:
        _default_project_root = str(_projects_root_fn() / "leviathan")
    except Exception:  # paths unresolvable — fall through to required-arg behavior
        _default_project_root = None
    parser.add_argument(
        "--project-root",
        default=_default_project_root,
        help="Path to project data (Recoil project directory)",
    )
    parser.add_argument("--dry-run", action="store_true", help="Don't call APIs")
    parser.add_argument(
        "--extraction-model",
        default=DEFAULT_EXTRACTION_MODEL,
        help=f"Extraction model (default: {DEFAULT_EXTRACTION_MODEL})",
    )
    parser.add_argument(
        "--batch-size",
        type=int,
        default=DEFAULT_BATCH_SIZE,
        help=f"Episodes per batch (default: {DEFAULT_BATCH_SIZE})",
    )

    sub = parser.add_subparsers(dest="command")

    # camera-test
    ct_parser = sub.add_parser("camera-test", help="Run Stage 0 on episode(s)")
    ct_parser.add_argument("episodes", nargs="+", type=int, help="Episode numbers")

    # breakdown
    sub.add_parser("breakdown", help="Run Stage 1 (Global Bible)")

    # enrichment
    sub.add_parser("enrichment", help="Run Stage 1.5 (Opus Enrichment on bible)")

    # storyboard
    sb_parser = sub.add_parser("storyboard", help="Run Stage 2 on episode(s)")
    sb_parser.add_argument("episodes", nargs="+", type=int, help="Episode numbers")

    # full
    full_parser = sub.add_parser("full", help="Run all stages")
    full_parser.add_argument("--episodes", nargs="*", type=int, help="Episode numbers (default: all)")
    full_parser.add_argument("--skip-enrichment", action="store_true",
                             help="Skip Opus enrichment (Stage 1.5) in full pipeline")

    args = parser.parse_args()

    pipeline = IngestPipeline(
        project=args.project,
        project_root=Path(args.project_root),
        dry_run=args.dry_run,
        extraction_model=args.extraction_model,
        batch_size=args.batch_size,
    )

    if args.command == "camera-test":
        for ep in args.episodes:
            pipeline.run_camera_test(episode_num=ep)

    elif args.command == "breakdown":
        pipeline.run_breakdown_pass()

    elif args.command == "enrichment":
        pipeline.run_opus_enrichment()

    elif args.command == "storyboard":
        bible = pipeline._load_bible()
        for ep in args.episodes:
            pipeline.run_storyboard_pass(ep, bible)

    elif args.command == "full":
        if getattr(args, 'skip_enrichment', False):
            # Temporarily disable enrichment in run_full
            pipeline._skip_enrichment = True
        pipeline.run_full(args.episodes)

    else:
        parser.print_help()


if __name__ == "__main__":
    # Add project root to path when running as script
    sys.path.insert(0, str(Path(__file__).parent.parent))
    main()