"""
keyframe_context.py — Smart prompt engineering + extrapolation for keyframe pipeline.

Layer 2: Flash text calls to build NBP-optimized prompts from approved previz,
director edits, and bible context. Layer 3: Extrapolation prompts for first/last
frame generation from locked keyframes.

Cost: ~$0.001 per Flash text call.
"""

import hashlib
import json
import logging
import os
from pathlib import Path
from typing import Optional

from recoil.core.paths import ProjectPaths
from recoil.core.prompt_config import load_prompt_file
from recoil.core.exceptions import KeyframeContextLookupError

logger = logging.getLogger(__name__)

# Cost estimate for Flash text-only call
FLASH_TEXT_COST = 0.001


def _call_flash_text(system_instruction: str, user_parts: list) -> dict:
    """Make a Flash text-only call (no image generation).

    Args:
        system_instruction: System prompt for Flash.
        user_parts: List of content parts — each is either:
            - str (text)
            - tuple (bytes, mime_type, label) for images

    Returns:
        {"success": True, "text": str, "cost": float}
        or {"success": False, "error": str}
    """
    try:
        from google import genai
        from google.genai import types as genai_types

        api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
        if not api_key:
            return {"success": False, "error": "GEMINI_API_KEY not set"}

        client = genai.Client(api_key=api_key)

        config = genai_types.GenerateContentConfig(
            temperature=0.3,
            systemInstruction=system_instruction,
            responseModalities=["TEXT"],
        )

        # Build multimodal content
        contents = []
        for part in user_parts:
            if isinstance(part, str):
                contents.append(genai_types.Part.from_text(text=part))
            elif isinstance(part, tuple) and len(part) == 3:
                img_bytes, mime_type, label = part
                contents.append(genai_types.Part.from_bytes(data=img_bytes, mime_type=mime_type))
                if label:
                    contents.append(genai_types.Part.from_text(text=label))

        from recoil.core.model_profiles import get_model
        flash_model = get_model("flash", "text")

        response = client.models.generate_content(
            model=flash_model,
            contents=contents,
            config=config,
        )

        text = ""
        if response and response.candidates:
            for candidate in response.candidates:
                if candidate.content and candidate.content.parts:
                    for part in candidate.content.parts:
                        if hasattr(part, "text") and part.text:
                            text += part.text

        if text:
            return {"success": True, "text": text.strip(), "cost": FLASH_TEXT_COST}

        return {"success": False, "error": "No text in response"}

    except Exception as e:
        return {"success": False, "error": str(e)}


def extract_scene_visual_dna(
    keyframe_bytes: bytes,
    shot: dict,
    bible: dict,
    project: str,
) -> dict:
    """Extract visual DNA from a scene's first keyframe for consistency locks.

    Flash analyzes the generated keyframe and extracts lighting, palette, and
    atmosphere as prose descriptions. These become "scene visual locks" injected
    into all subsequent shots in the same scene.

    Args:
        keyframe_bytes: Raw bytes of the generated keyframe image.
        shot: Shot plan dict (from ep_NNN_plan.json).
        bible: Global bible dict.
        project: Project name.

    Returns:
        {"success": True, "locks": {"lighting": str, "palette": str, "atmosphere": str}, "cost": float}
        or {"success": False, "error": str}
    """
    system_instruction = load_prompt_file("flash_scene_dna_v1.0.txt")
    if not system_instruction:
        return {"success": False, "error": "flash_scene_dna_v1.0.txt not found"}

    shot_id = shot.get("shot_id", "UNKNOWN")
    location_id = shot.get("asset_data", {}).get("location_id", "")

    user_parts = [
        (keyframe_bytes, "image/png", f"Keyframe from {shot_id} — analyze this image"),
    ]
    if location_id:
        user_parts.append(f"This shot is set in location: {location_id}")

    result = _call_flash_text(system_instruction, user_parts)
    if not result.get("success"):
        return {"success": False, "error": result.get("error", "Flash call failed")}

    # Parse the three labeled lines
    text = result["text"]
    locks = {"lighting": "", "palette": "", "atmosphere": ""}
    for line in text.split("\n"):
        line = line.strip()
        if line.upper().startswith("LIGHTING:"):
            locks["lighting"] = line.split(":", 1)[1].strip()
        elif line.upper().startswith("PALETTE:"):
            locks["palette"] = line.split(":", 1)[1].strip()
        elif line.upper().startswith("ATMOSPHERE:"):
            locks["atmosphere"] = line.split(":", 1)[1].strip()

    # Save to scene_locks.json
    ep_match = None
    import re
    ep_match = re.match(r"EP(\d+)_SH(\d+)", shot_id)
    episode = int(ep_match.group(1)) if ep_match else 0
    scene_index = shot.get("scene_index", 0)
    lock_key = f"{episode}_{scene_index}"

    state_dir = ProjectPaths.for_project(project).visual_state_dir
    state_dir.mkdir(parents=True, exist_ok=True)
    locks_path = state_dir / "scene_locks.json"

    all_locks = {}
    if locks_path.exists():
        try:
            all_locks = json.loads(locks_path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, IOError):
            pass

    all_locks[lock_key] = locks
    locks_path.write_text(json.dumps(all_locks, indent=2), encoding="utf-8")

    return {"success": True, "locks": locks, "cost": result["cost"]}


def load_scene_locks(episode: int, scene_index: int, project: str) -> dict | None:
    """Load scene visual locks for a given scene.

    Args:
        episode: Episode number.
        scene_index: Scene index within the episode.
        project: Project name.

    Returns:
        Dict with "lighting", "palette", "atmosphere" keys, or None if not found.
    """
    lock_key = f"{episode}_{scene_index}"
    locks_path = ProjectPaths.for_project(project).visual_state_dir / "scene_locks.json"
    if not locks_path.exists():
        return None
    try:
        all_locks = json.loads(locks_path.read_text(encoding="utf-8"))
        return all_locks.get(lock_key)
    except (json.JSONDecodeError, IOError):
        return None


def describe_moodboard(
    moodboard_bytes: bytes,
    location_id: str,
    mime_type: str,
    project: str,
) -> dict:
    """Convert a location moodboard image to text description via Flash.

    Caches results by SHA-256 of image bytes to avoid redundant API calls.

    Args:
        moodboard_bytes: Raw bytes of the moodboard image.
        location_id: Location identifier for logging.
        mime_type: MIME type of the image.
        project: Project name.

    Returns:
        {"success": True, "text": str, "cost": float}
        or {"success": False, "error": str}
    """
    # Check cache first
    img_hash = hashlib.sha256(moodboard_bytes).hexdigest()
    state_dir = ProjectPaths.for_project(project).visual_state_dir
    state_dir.mkdir(parents=True, exist_ok=True)
    cache_path = state_dir / "moodboard_text_cache.json"

    cache = {}
    if cache_path.exists():
        try:
            cache = json.loads(cache_path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, IOError):
            pass

    if img_hash in cache:
        entry = cache[img_hash]
        return {"success": True, "text": entry["text"], "cost": 0.0}

    # Cache miss — call Flash
    system_instruction = load_prompt_file("flash_moodboard_to_text_v1.0.txt")
    if not system_instruction:
        return {"success": False, "error": "flash_moodboard_to_text_v1.0.txt not found"}

    user_parts = [
        (moodboard_bytes, mime_type, f"Location moodboard: {location_id}"),
    ]

    result = _call_flash_text(system_instruction, user_parts)
    if not result.get("success"):
        return {"success": False, "error": result.get("error", "Flash call failed")}

    text = result["text"].strip()

    # Save to cache
    cache[img_hash] = {"text": text, "location_id": location_id}
    cache_path.write_text(json.dumps(cache, indent=2), encoding="utf-8")

    return {"success": True, "text": text, "cost": result["cost"]}


def build_smart_prompt(
    shot: dict,
    all_shots: list[dict],
    bible: dict,
    episode: int,
    project: str,
    previz_image_path: Path | None = None,
    director_edit: str | None = None,
) -> dict:
    """Flash text call to build/refine NBP-optimized keyframe prompt.

    Receives the approved previz frame (composition reference), adjacent shot
    context, pre-compiled keyframe_nbp prompt from plan, and bible context.
    Flash rewrites for NBP-optimal generation.

    Args:
        shot: Shot plan dict (from ep_NNN_plan.json).
        all_shots: All shots in the episode (for continuity context).
        bible: Global bible dict.
        episode: Episode number.
        project: Project name.
        previz_image_path: Path to the approved previz frame.
        director_edit: Optional director modifications to incorporate.

    Returns:
        {"prompt": str, "flash_reasoning": str, "cost": float}
        or {"error": str} on failure.
    """
    from recoil.pipeline._lib.previz_context import format_bible_context, format_shot_sequence

    shot_id = shot.get("shot_id", "UNKNOWN")
    prompt_data = shot.get("prompt_data", {})
    asset_data = shot.get("asset_data", {})
    routing_data = shot.get("routing_data", {})
    spatial_data = prompt_data.get("spatial_data", {})

    # Get pre-compiled keyframe prompt from plan (Stage 2 output)
    pre_compiled = prompt_data.get("keyframe_nbp", "")
    if not pre_compiled:
        # Fall back to building from prompt_skeleton
        skeleton = prompt_data.get("prompt_skeleton", {})
        pre_compiled = ", ".join(v for v in skeleton.values() if v)

    # Bible context (characters + locations relevant to this shot)
    bible_text = format_bible_context(bible, episode=episode) if bible else ""

    # Shot sequence for continuity
    sequence_text = format_shot_sequence(all_shots, shot_id) if all_shots else ""

    # Adjacent shot context
    shot_idx = next((i for i, s in enumerate(all_shots) if s.get("shot_id") == shot_id), -1)
    prev_shot_desc = ""
    next_shot_desc = ""
    if shot_idx > 0:
        prev = all_shots[shot_idx - 1]
        prev_skel = prev.get("prompt_data", {}).get("prompt_skeleton", {})
        prev_shot_desc = f"PREVIOUS SHOT ({prev.get('shot_id', '?')}): {prev_skel.get('subject_line', '')}"
    if shot_idx >= 0 and shot_idx < len(all_shots) - 1:
        nxt = all_shots[shot_idx + 1]
        nxt_skel = nxt.get("prompt_data", {}).get("prompt_skeleton", {})
        next_shot_desc = f"NEXT SHOT ({nxt.get('shot_id', '?')}): {nxt_skel.get('subject_line', '')}"

    # Load externalized system prompt (versioned file)
    system_instruction = load_prompt_file("flash_to_nbp_v1.0.txt")
    if not system_instruction:
        return {"error": "flash_to_nbp_v1.0.txt not found"}

    if director_edit:
        system_instruction += """

DIRECTOR EDIT MODE: The director has modified the prompt. Incorporate their changes \
while preserving visual continuity with the approved previz composition. The director's \
intent takes priority. Still output the strict 5-bracket format."""

    # Build user parts
    user_parts = []

    # Bible context
    if bible_text:
        user_parts.append(f"# VISUAL BIBLE\n\n{bible_text}")

    # Scene visual locks (from first keyframe DNA extraction)
    scene_locks = shot.get("_scene_visual_locks")
    if scene_locks:
        lock_parts = []
        if scene_locks.get("lighting"):
            lock_parts.append(f"Scene lighting lock: {scene_locks['lighting']}")
        if scene_locks.get("palette"):
            lock_parts.append(f"Scene palette: {scene_locks['palette']}")
        if scene_locks.get("atmosphere"):
            lock_parts.append(f"Scene atmosphere: {scene_locks['atmosphere']}")
        if lock_parts:
            user_parts.append("# SCENE VISUAL LOCKS (maintain consistency)\n" + "\n".join(lock_parts))

    # Shot sequence
    if sequence_text:
        user_parts.append(sequence_text)

    # Adjacent shot context
    if prev_shot_desc:
        user_parts.append(prev_shot_desc)
    if next_shot_desc:
        user_parts.append(next_shot_desc)

    # Approved previz image
    if previz_image_path and previz_image_path.is_file():
        try:
            img_bytes = previz_image_path.read_bytes()
            mime = "image/png" if previz_image_path.suffix.lower() == ".png" else "image/jpeg"
            user_parts.append((img_bytes, mime, "APPROVED PREVIZ — match this composition"))
        except IOError as e:
            logger.warning("Could not read previz image %s: %s", previz_image_path, e)

    # Shot plan data
    shot_spec = f"""# SHOT SPECIFICATION
Shot ID: {shot_id}
Shot type: {prompt_data.get('shot_type', 'MS')}
Focal length: {prompt_data.get('focal_length', '')}
Camera movement: {prompt_data.get('camera_movement', 'static')}
Location: {asset_data.get('location_id', '')}
Characters: {', '.join(c.get('char_id', str(c)) if isinstance(c, dict) else str(c) for c in asset_data.get('characters', []))}
"""

    skeleton = prompt_data.get("prompt_skeleton", {})
    if skeleton.get("subject_line"):
        shot_spec += f"Action: {skeleton['subject_line']}\n"
    if skeleton.get("emotion_line"):
        shot_spec += f"Emotion: {skeleton['emotion_line']}\n"
    if skeleton.get("environment_line"):
        shot_spec += f"Environment: {skeleton['environment_line']}\n"

    user_parts.append(shot_spec)

    # Pre-compiled prompt
    user_parts.append(f"# PRE-COMPILED KEYFRAME PROMPT (from plan)\n{pre_compiled}")

    # Director edit
    if director_edit:
        user_parts.append(f"# DIRECTOR'S MODIFICATIONS\n{director_edit}")

    # Make the call
    result = _call_flash_text(system_instruction, user_parts)

    if not result.get("success"):
        return {"error": result.get("error", "Flash text call failed")}

    # Parse response: split on ---REASONING---
    full_text = result["text"]
    if "---REASONING---" in full_text:
        parts = full_text.split("---REASONING---", 1)
        prompt = parts[0].strip()
        reasoning = parts[1].strip()
    else:
        prompt = full_text.strip()
        reasoning = ""

    # ── IP3: Keyframe Rewrite Critic ──────────────────────────────
    critic_result_dict = None
    try:
        from recoil.core.paths import get_config
        _cfg = get_config()
        if _cfg.get("critic_flags", {}).get("ip3_keyframe_rewrite", False):
            from recoil.pipeline._lib.critics.keyframe_rewrite_critic import KeyframeRewriteCritic
            _pool_dir = ProjectPaths.for_project(project).visual_state_dir
            _critic = KeyframeRewriteCritic(
                bible=bible,
                shot=shot,
                max_attempts=2,
                experience_pool_dir=_pool_dir,
                shot_id=shot.get("shot_id", ""),
            )
            prompt, _cr = _critic.run(prompt)
            critic_result_dict = _cr.to_dict()
            if not _cr.passed:
                logger.warning("IP3 critic: %s — %s", shot.get("shot_id", "?"),
                    [d.name for d in _cr.hard_failures])
    except Exception as _e:
        logger.debug("IP3 critic unavailable, passing through: %s", _e)
    # ── End IP3 ───────────────────────────────────────────────────

    out = {
        "prompt": prompt,
        "flash_reasoning": reasoning,
        "cost": result["cost"],
    }
    if critic_result_dict:
        out["critic_result"] = critic_result_dict
    return out


def build_extrapolation_prompt(
    shot: dict,
    all_shots: list[dict],
    bible: dict,
    anchor_role: str,  # "first_frame", "last_frame", or "hero_frame"
    keyframe_image_path: Path,
    episode: int,
    project: str,
    target_frame: str | None = None,  # Required for hero_frame: "first_frame" or "last_frame"
) -> dict:
    """Build NBP prompt for the extrapolated frame (the other end of the shot).

    Flash text call that reads the locked keyframe, shot action/movement/dialogue,
    and adjacent shot keyframes to generate what the scene looks like at the
    other end of the shot duration.

    Match action logic: If anchor is FIRST frame and next shot has a locked
    keyframe, the extrapolated LAST frame should begin the transition toward
    the next shot's composition.

    Args:
        shot: Shot plan dict.
        all_shots: All shots in episode.
        bible: Global bible dict.
        anchor_role: "first_frame" or "last_frame".
        keyframe_image_path: Path to the locked keyframe.
        episode: Episode number.
        project: Project name.

    Returns:
        {"prompt": str, "flash_reasoning": str, "cost": float}
        or {"error": str} on failure.
    """
    from recoil.pipeline._lib.previz_context import format_bible_context

    shot_id = shot.get("shot_id", "UNKNOWN")
    prompt_data = shot.get("prompt_data", {})
    asset_data = shot.get("asset_data", {})
    skeleton = prompt_data.get("prompt_skeleton", {})

    # Determine which frame we're generating
    if anchor_role == "hero_frame":
        if not target_frame or target_frame not in ("first_frame", "last_frame"):
            return {"error": "target_frame required for hero_frame anchor (must be 'first_frame' or 'last_frame')"}
        extrapolating = target_frame
    else:
        extrapolating = "last_frame" if anchor_role == "first_frame" else "first_frame"

    # Find adjacent shots for match action context
    shot_idx = next((i for i, s in enumerate(all_shots) if s.get("shot_id") == shot_id), -1)

    # Build system instruction — ENVIRONMENT LOCK + POSE CHANGE
    # The keyframe ref image anchors the background. The prompt must keep
    # [Environment] and [Lighting] tags identical so NBP doesn't regenerate
    # the background. All visual change should be in body pose, expression,
    # and physical position within the frame.
    system_instruction = f"""You are a prompt engineer for NBP (Gemini 3 Pro Image Preview). \
You are generating the {'END' if extrapolating == 'last_frame' else 'START'} frame of a shot, \
given the locked {'START' if anchor_role == 'first_frame' else 'END'} frame as reference.

ENVIRONMENT LOCK — CRITICAL:
You will receive the ORIGINAL prompt that generated the anchor keyframe.
- COPY [Environment] and [Lighting] tags VERBATIM — character-for-character, no rewording.
  If these change AT ALL, NBP generates a different background and the video model will
  morph between two different rooms. This is the #1 failure mode.
- COPY [Camera] tag verbatim unless the shot has camera movement.
- REWRITE [Subject] and [Action/Pose] to show a CLEARLY DIFFERENT body position.

Think of this as the same security camera footage, 5 seconds {'earlier' if extrapolating == 'first_frame' else 'later'}. \
The room is identical. The person is in a visibly different position."""

    if anchor_role == "first_frame" and shot_idx >= 0 and shot_idx < len(all_shots) - 1:
        system_instruction += """

MATCH ACTION: The extrapolated LAST frame should begin the physical transition toward \
the next shot's action. Show the character's body already moving toward the next beat."""

    # ── Hero frame anchor: bidirectional extrapolation with kinetic overrides ──
    if anchor_role == "hero_frame":
        camera_move = prompt_data.get("camera_movement", "static")
        is_static = camera_move in ("static", "", None)

        if is_static:
            spatial_rule = "Subject stays in roughly the same area of frame."
        else:
            spatial_rule = f"Camera is {camera_move} — adjust subject position to reflect where the camera would be 5 seconds {'earlier' if extrapolating == 'first_frame' else 'later'}."

        if extrapolating == "first_frame":
            system_instruction += f"""

ANTICIPATION FRAME — 5 SECONDS BEFORE THE PEAK:
The reference image shows the DECISIVE MOMENT. You are generating what the camera
saw 5 seconds earlier. The character has NOT yet committed to the action.

Describe a PHYSICALLY DIFFERENT body position — not a subtle tweak:
- If the hero shows someone REACHING for something → anticipation shows them APPROACHING, arms at sides, eyes scanning
- If the hero shows someone REACTING in shock → anticipation shows them UNAWARE, relaxed posture, looking elsewhere
- If the hero shows someone MID-STRIDE → anticipation shows them STANDING STILL or just beginning to shift weight
- If the hero shows someone CROUCHING over something → anticipation shows them STANDING UPRIGHT nearby
- The character's wardrobe and hair are identical (same outfit, same style)
- Expression is NEUTRAL or mildly curious — no intensity yet
- {spatial_rule}

The pose change must be OBVIOUS in a thumbnail. If the difference is only in finger position
or a slight head tilt, you haven't changed enough. Think: "What was this person doing
5 seconds before this photo was taken?" and describe THAT body position."""

        else:  # extrapolating == "last_frame"
            system_instruction += f"""

AFTERMATH FRAME — 5 SECONDS AFTER THE PEAK:
The reference image shows the DECISIVE MOMENT. You are generating what the camera
sees 5 seconds later. The action has COMPLETED and the character is in a new state.

Describe a PHYSICALLY DIFFERENT body position — not a subtle tweak:
- If the hero shows someone REACHING → aftermath shows them HOLDING the object, examining it, or pulling back
- If the hero shows someone REACTING in shock → aftermath shows them PROCESSING: hand on face, turned away, stepping back
- If the hero shows someone MID-STRIDE → aftermath shows them ARRIVED at the destination, stopped, looking around
- If the hero shows someone in CONFRONTATION → aftermath shows them WITHDRAWN, turned partially away, or slumped
- The character's wardrobe and hair are identical (same outfit, same style)
- Expression shows CONSEQUENCE: relief, exhaustion, determination, grief — the emotional residue
- {spatial_rule}

The pose change must be OBVIOUS in a thumbnail. If the difference is only in expression
or a slight lean, you haven't changed enough. Think: "What was this person doing
5 seconds after this photo was taken?" and describe THAT body position."""

    system_instruction += """

OUTPUT FORMAT:
First, write the NBP prompt. Copy [Environment] and [Lighting] verbatim from the original.
Rewrite [Subject] and [Action/Pose] to show the new body position.
Then write "---REASONING---" on its own line.
Then explain: what physical position changed and why (2-3 sentences)."""

    # Build user parts
    user_parts = []

    # Bible context (brief)
    bible_text = format_bible_context(bible, episode=episode) if bible else ""
    if bible_text:
        user_parts.append(f"# VISUAL BIBLE\n\n{bible_text}")

    # Locked keyframe image
    if keyframe_image_path.is_file():
        try:
            img_bytes = keyframe_image_path.read_bytes()
            mime = "image/png" if keyframe_image_path.suffix.lower() == ".png" else "image/jpeg"
            if anchor_role == "hero_frame":
                label = "LOCKED KEYFRAME — this is the HERO frame (decisive moment / peak action)"
            else:
                label = f"LOCKED KEYFRAME — this is the {'FIRST' if anchor_role == 'first_frame' else 'LAST'} frame"
            user_parts.append((img_bytes, mime, label))
        except IOError as e:
            logger.warning("Could not read keyframe %s: %s", keyframe_image_path, e)
            return {"error": f"Could not read keyframe: {e}"}

    # Shot plan data
    shot_spec = f"""# SHOT SPECIFICATION
Shot ID: {shot_id}
Shot type: {prompt_data.get('shot_type', 'MS')}
Camera movement: {prompt_data.get('camera_movement', 'static')}
Location: {asset_data.get('location_id', '')}
Action: {skeleton.get('subject_line', '')}
Emotion: {skeleton.get('emotion_line', '')}
"""
    dialogue = shot.get("dialogue", "") or prompt_data.get("dialogue", "")
    if dialogue:
        shot_spec += f'Dialogue: "{dialogue}"\n'

    user_parts.append(shot_spec)

    # Adjacent shot context for match actions
    if anchor_role == "first_frame" and shot_idx >= 0 and shot_idx < len(all_shots) - 1:
        nxt = all_shots[shot_idx + 1]
        nxt_skel = nxt.get("prompt_data", {}).get("prompt_skeleton", {})
        user_parts.append(f"# NEXT SHOT ({nxt.get('shot_id', '?')})\nAction: {nxt_skel.get('subject_line', '')}")
    elif anchor_role == "last_frame" and shot_idx > 0:
        prev = all_shots[shot_idx - 1]
        prev_skel = prev.get("prompt_data", {}).get("prompt_skeleton", {})
        user_parts.append(f"# PREVIOUS SHOT ({prev.get('shot_id', '?')})\nAction: {prev_skel.get('subject_line', '')}")

    # Match action context for hero_frame extrapolation
    # Use action_line (character motion only) instead of subject_line
    # to avoid injecting narrative elements from adjacent shots
    # (e.g. "dead body" from the next shot leaking into this shot's aftermath)
    if anchor_role == "hero_frame":
        if extrapolating == "last_frame" and shot_idx >= 0 and shot_idx < len(all_shots) - 1:
            # Aftermath → add next-shot motion context for match action
            nxt = all_shots[shot_idx + 1]
            nxt_skel = nxt.get("prompt_data", {}).get("prompt_skeleton", {})
            nxt_action = nxt_skel.get("action_line", "") or nxt_skel.get("emotion_line", "")
            if nxt_action:
                user_parts.append(f"# NEXT SHOT ({nxt.get('shot_id', '?')})\nCharacter motion: {nxt_action}")
        elif extrapolating == "first_frame" and shot_idx > 0:
            # Anticipation → add previous-shot motion context for match action
            prev = all_shots[shot_idx - 1]
            prev_skel = prev.get("prompt_data", {}).get("prompt_skeleton", {})
            prev_action = prev_skel.get("action_line", "") or prev_skel.get("emotion_line", "")
            if prev_action:
                user_parts.append(f"# PREVIOUS SHOT ({prev.get('shot_id', '?')})\nCharacter motion: {prev_action}")

    # Include the original keyframe prompt for delta prompting
    # Try to find it from the execution store (the prompt used to generate the keyframe)
    original_prompt = _find_keyframe_prompt(shot.get("shot_id", ""), project)
    if original_prompt:
        user_parts.append(f"# ORIGINAL KEYFRAME PROMPT (copy this, change ONLY [Action/Pose])\n{original_prompt}")

    if anchor_role == "hero_frame":
        direction = "ANTICIPATION (before the peak)" if extrapolating == "first_frame" else "AFTERMATH (after the peak)"
        user_parts.append(f"Generate the {direction} frame prompt now. Remember: minimal delta, strip kinetic energy from the hero reference.")
    else:
        user_parts.append(f"Generate the {'LAST' if extrapolating == 'last_frame' else 'FIRST'} frame prompt now. Remember: minimal delta only.")

    # Make the call
    result = _call_flash_text(system_instruction, user_parts)

    if not result.get("success"):
        return {"error": result.get("error", "Flash text call failed")}

    # Parse response
    full_text = result["text"]
    if "---REASONING---" in full_text:
        parts = full_text.split("---REASONING---", 1)
        prompt = parts[0].strip()
        reasoning = parts[1].strip()
    else:
        prompt = full_text.strip()
        reasoning = ""

    return {
        "prompt": prompt,
        "flash_reasoning": reasoning,
        "cost": result["cost"],
    }


def _find_keyframe_prompt(shot_id: str, project: str) -> str | None:
    """Find the prompt used to generate the locked keyframe.

    Looks in the execution store for the latest keyframe take's prompt.
    Returns None if the genuinely-missing data path is hit (no execution
    state file, no shot record, no keyframe take). Schema / I/O errors
    (corrupt execution_state.json, OSError on read) propagate as
    ``KeyframeContextLookupError`` per Tenet 6 — they indicate a real
    bug, not "no data."
    """
    try:
        state_dir = ProjectPaths.for_project(project).visual_state_dir
        exec_path = state_dir / "execution_state.json"
        if not exec_path.exists():
            return None
        exec_data = json.loads(exec_path.read_text(encoding="utf-8"))
        shot = exec_data.get("shots", {}).get(shot_id)
        if not shot:
            return None
        takes = shot.get("takes", [])
        kf_takes = [t for t in takes if t.get("layer") == "keyframe"]
        if not kf_takes:
            return None
        # Latest keyframe take
        return kf_takes[-1].get("prompt")
    except (KeyError, AttributeError):
        return None  # genuinely missing data
    except Exception as e:
        logger.exception(
            "_find_keyframe_prompt: unexpected failure for shot_id=%s project=%s",
            shot_id, project,
        )
        raise KeyframeContextLookupError(
            "keyframe_prompt", shot_id, message=str(e)
        ) from e


def build_extrapolation_refs(
    start_keyframe_bytes: bytes,
    character_refs: list[tuple[bytes, str, str]],
    location_refs: list[tuple[bytes, str]],
    prop_refs: list[tuple[bytes, str, str]] | None = None,
) -> list[tuple[bytes, str, str]]:
    """Build ref array for end frame generation (Option C).

    The start keyframe is placed LAST for recency bias — it becomes
    the highest-priority reference, anchoring geometry, lighting,
    and wardrobe to the start frame.

    Budget: 5 refs max (1 location + 1 prop + up to 3 character + start keyframe LAST).
    Character refs are taken from the END of the list (hero identity refs are last
    in resolve_all_character_refs output, so [-3:] preserves them).

    Args:
        start_keyframe_bytes: The locked start keyframe image bytes.
        character_refs: List of (bytes, mime_type, label) character refs.
            Ordered by resolve_all_character_refs: turnarounds first, hero last.
        location_refs: List of (bytes, mime_type) location refs.
        prop_refs: Optional list of (bytes, mime_type, label) prop refs.

    Returns:
        List of (bytes, mime_type, label) tuples for NBP.
    """
    refs: list[tuple[bytes, str, str]] = []

    # NOTE: Location moodboard intentionally EXCLUDED from extraction refs.
    # The hero keyframe already contains the correct environment. Including
    # the moodboard (a different angle/lighting of the same location) causes
    # NBP to average the two, producing background drift between hero and
    # extracted frames. The keyframe anchor is the sole environment reference.

    # 1. Keyframe FIRST (lowest recency = environment anchor, NOT pose anchor)
    #    Placed first so character identity refs have MORE weight than the
    #    keyframe's pose. NBP was reproducing the keyframe pose too faithfully
    #    when it was last — moving it first lets the prompt drive the pose
    #    while the keyframe still grounds the background.
    refs.append((start_keyframe_bytes, "image/png",
        "ENVIRONMENT REFERENCE — match this background, lighting, walls, floor, "
        "and set dressing exactly. The character's pose WILL BE DIFFERENT in this frame."))

    # 2. Prop ref (1 max — if present in shot)
    if prop_refs:
        refs.append(prop_refs[0])

    # 3. Character identity refs LAST (highest recency = strongest face influence)
    #    Take from the END — resolve_all_character_refs() puts hero identity
    #    refs LAST, so [-3:] preserves them.
    if character_refs:
        refs.extend(character_refs[-3:])

    # Enforce 5-ref cap
    if len(refs) > 5:
        # Drop from the front (location/prop first) to preserve identity + start frame
        refs = refs[len(refs) - 5:]

    return refs
