# recoil/pipeline/_lib/plan_loader.py
"""Plan loader + canonical normalizer.

ONE canonical in-memory representation for plans and plan-shots. Today
the codebase ships two file shapes (flat `shots` list and legacy
`sequences` dict). Every consumer downstream of this module reads the
canonical shape — they never branch on raw file shape. This is the
Law 1 (SSOT) foundation for run_overnight.

Schema notes:
  * `previs_model` carries the legacy `model` field forward (the
    still-pass model id, e.g. `gemini-3-pro-image-preview`).
  * `video_model` is NEW (Phase 0). Absent in legacy plans → None.
    Callers MUST provide a video model via `--model` CLI override or
    `build_dispatch_payload(model_override=...)`; otherwise the builder
    falls through to `dispatch_payload.DEFAULT_VIDEO_MODEL`.

Consumers:
  * `recoil/pipeline/_lib/dispatch_payload.py` — Phase 1; reads
    `CanonicalShot.previs_model`/`video_model` and the rest of the
    typed surface.
  * `recoil/pipeline/_lib/scene_clusterer.py` — Phase 4; reads
    `CanonicalShot` only.
  * `recoil/pipeline/orchestrator/episode_runner.py` — Phase 4; walks
    `CanonicalPlan.shots`.
  * `recoil/pipeline/cli/run_overnight.py` — Phase 4; calls
    `load_plan(path)` once at startup.
"""

from __future__ import annotations

import json
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional

logger = logging.getLogger(__name__)


# ── Typed plan-schema entries ─────────────────────────────────────
#
# Bug A fix (2026-05-19): EP001 plan ships `asset_data.characters` as
# list[dict] like `[{"char_id": "JADE", "wardrobe_phase_id": "p1"}, ...]`.
# Prior loader at lines 173-179 stringified the dict via str(c) and used
# the resulting "{'char_id': 'jade', ...}" as the canonical character id,
# which then missed every downstream resolver lookup. Typed entries make
# the schema explicit at load time; downstream consumers read .char_id
# and .wardrobe_phase_id rather than guessing.

@dataclass(frozen=True)
class CharacterEntry:
    """A single character reference inside a plan shot.

    Frozen so equality + hashability are well-defined for use as dict
    keys and in `seen` sets during deduplication. CanonicalShot uses
    `__post_init__` to re-typify after dataclasses.asdict() round-trips
    through episode_runner (see CanonicalShot.__post_init__).
    """
    char_id: str
    wardrobe_phase_id: Optional[str] = None
    emotion_keyword: Optional[str] = None
    screen_position: Optional[str] = None
    visibility: str = "in_frame"


class PlanValidationError(ValueError):
    """Raised by load_plan / _canonicalize_shot when an asset_data.characters
    entry is neither a dict nor a string. Subclass of ValueError so existing
    call sites that catch ValueError continue to function unless they want
    to special-case plan-validation failures.
    """


@dataclass
class CanonicalShot:
    """Normalized representation of a single plan shot.

    Field names are the SSOT for all downstream consumers. The
    `raw` dict is retained as an escape hatch for builders that need
    fields not yet promoted to first-class (e.g. `compiled_prompts`),
    but consumers SHOULD prefer the typed fields wherever possible.
    """
    shot_id: str
    scene_index: int
    sequence_id: Optional[str]                  # None for flat plans
    pipeline: Optional[str]                     # "still" / "video" / etc.
    previs_model: Optional[str]                 # was: legacy `model`
    video_model: Optional[str]                  # new field; None when absent
    location_id: Optional[str]
    characters: list["CharacterEntry"]
    shot_type: Optional[str]
    duration_s: Optional[float]
    is_env_only: bool
    has_dialogue: bool
    aspect_ratio: Optional[str]
    raw: dict[str, Any]                         # original plan shot dict
    cinematography: Optional[dict[str, Any]] = None
    # ^ Cinema Mode Framework typed block:
    #   {"mode": "<CINEMA_MODES.yaml mode id>",
    #    "overrides": {"lens": "<catalog id>", ...}}
    #   Validated at load time by _validate_cinematography_block().
    #   None for legacy plans authored before the framework existed.
    quality: Optional[str] = None
    # ^ gpt-image-2 quality tier override per shot (low/medium/high).
    #   Per-shot field in plan-pass JSON (SYNTHESIS §10). None for legacy
    #   plans / non-quality-aware models. Adapter falls back to 'medium'
    #   when None and model is gpt-image-2.

    def __post_init__(self) -> None:
        """Re-typify `characters` after asdict() → CanonicalShot(**d) round-trip."""
        if not self.characters:
            return
        if all(isinstance(c, CharacterEntry) for c in self.characters):
            return
        retyped: list[CharacterEntry] = []
        for c in self.characters:
            if isinstance(c, CharacterEntry):
                retyped.append(c)
            elif isinstance(c, dict):
                retyped.append(CharacterEntry(**c))
            elif isinstance(c, str):
                retyped.append(CharacterEntry(char_id=c.strip().upper()))
            else:
                raise PlanValidationError(
                    f"CanonicalShot.__post_init__: characters entry must be "
                    f"CharacterEntry, dict, or str, got {type(c).__name__}: {c!r}"
                )
        self.characters = retyped


@dataclass
class CanonicalPlan:
    """Normalized representation of an episode plan.

    `shots` is the SSOT; consumers walk this list in declaration order.
    `raw` is the parsed top-level JSON (escape hatch only).
    """
    episode_id: str
    project: str
    shots: list[CanonicalShot]
    source_path: Path
    raw: dict[str, Any] = field(default_factory=dict)


def load_plan(plan_path: Path) -> CanonicalPlan:
    """Load and normalize a plan JSON file.

    Handles both file shapes:
      * Flat — top-level `shots: list[dict]`.
      * Sequences — top-level `sequences: dict[str, list[dict]]`.
        Sequences are flattened into a single shot list; each
        CanonicalShot retains its `sequence_id`.

    Args:
        plan_path: Absolute path to the plan JSON file.

    Returns:
        CanonicalPlan with normalized shots.

    Raises:
        FileNotFoundError: plan_path does not exist.
        ValueError: plan JSON is malformed or contains neither shape.
    """
    p = Path(plan_path).expanduser()
    if not p.exists():
        raise FileNotFoundError(f"plan not found: {p}")
    raw = json.loads(p.read_text(encoding="utf-8"))

    episode_id = str(raw.get("episode_id") or raw.get("episode") or p.stem)
    project = str(raw.get("project") or "")

    flat_shots = raw.get("shots")
    sequences = raw.get("sequences")

    shots: list[CanonicalShot] = []
    if isinstance(flat_shots, list) and flat_shots:
        for s in flat_shots:
            shots.append(_canonicalize_shot(s, sequence_id=None))
    elif isinstance(sequences, dict) and sequences:
        for seq_id, seq_shots in sequences.items():
            if not isinstance(seq_shots, list):
                continue
            for s in seq_shots:
                shots.append(_canonicalize_shot(s, sequence_id=str(seq_id)))
    else:
        raise ValueError(
            f"plan {p} has neither 'shots' (flat) nor 'sequences' (legacy); "
            f"top-level keys: {list(raw.keys())}"
        )

    return CanonicalPlan(
        episode_id=episode_id,
        project=project,
        shots=shots,
        source_path=p,
        raw=raw,
    )


def _canonicalize_shot(s: dict, *, sequence_id: Optional[str]) -> CanonicalShot:
    """Normalize a single raw plan_shot dict into a CanonicalShot.

    Field-mapping rules (Law 1 — single rename point):
      * `model` (legacy) → `previs_model` (still-pass model id).
      * `video_model` (new) → `video_model` (None if absent in source).
      * `routing_data.target_editorial_duration_s` → `duration_s`.
      * `routing_data.is_env_only` → `is_env_only`.
      * `routing_data.has_dialogue` → `has_dialogue`.
      * `asset_data.location_id` → `location_id`.
      * `asset_data.characters` → `characters` (uppercased, deduped).
      * `prompt_data.shot_type` → `shot_type`.

    The `raw` field always carries the unmodified source dict so
    builders can reach `compiled_prompts`, `spatial_data`, etc.
    """
    shot_id = str(s.get("shot_id") or s.get("id") or "")
    if not shot_id:
        raise ValueError(
            f"plan shot missing shot_id (keys={list(s.keys())[:8]})"
        )
    scene_index = int(s.get("scene_index") or 0)

    routing = s.get("routing_data") or {}
    asset = s.get("asset_data") or {}
    prompt = s.get("prompt_data") or {}

    chars: list[CharacterEntry] = []
    seen: set[str] = set()
    for c in (asset.get("characters") or []):
        if isinstance(c, CharacterEntry):
            entry = c
        elif isinstance(c, dict):
            cid_raw = c.get("char_id")
            if cid_raw is None:
                raise PlanValidationError(
                    f"shot {shot_id!r}: characters entry dict missing "
                    f"'char_id' key: {c!r}"
                )
            cid = str(cid_raw).strip().upper()
            entry = CharacterEntry(
                char_id=cid,
                wardrobe_phase_id=c.get("wardrobe_phase_id"),
                emotion_keyword=c.get("emotion_keyword"),
                screen_position=c.get("screen_position"),
                visibility=c.get("visibility", "in_frame"),
            )
        elif isinstance(c, str):
            cid = c.strip().upper()
            if not cid:
                continue
            entry = CharacterEntry(char_id=cid)
        else:
            raise PlanValidationError(
                f"shot {shot_id!r}: characters entry must be dict or str, "
                f"got {type(c).__name__}: {c!r}"
            )
        if entry.char_id and entry.char_id not in seen:
            seen.add(entry.char_id)
            chars.append(entry)

    duration_raw = routing.get("target_editorial_duration_s")
    duration_s: Optional[float]
    try:
        duration_s = float(duration_raw) if duration_raw is not None else None
    except (TypeError, ValueError):
        duration_s = None

    cinematography = _validate_cinematography_block(s.get("cinematography"), shot_id)

    return CanonicalShot(
        shot_id=shot_id,
        scene_index=scene_index,
        sequence_id=sequence_id,
        pipeline=s.get("pipeline"),
        previs_model=s.get("model"),
        video_model=s.get("video_model"),
        location_id=asset.get("location_id"),
        characters=chars,
        shot_type=prompt.get("shot_type"),
        duration_s=duration_s,
        is_env_only=bool(routing.get("is_env_only", False)),
        has_dialogue=bool(routing.get("has_dialogue", False)),
        aspect_ratio=s.get("aspect_ratio"),
        raw=dict(s),
        cinematography=cinematography,
        quality=s.get("quality"),
    )


def _validate_cinematography_block(
    block: Optional[Any], shot_id: str
) -> Optional[dict[str, Any]]:
    """Validate the optional cinematography block from a plan shot.

    Returns the validated dict (verbatim shape preserved) or None if the
    plan shot has no cinematography block. Raises CinemaConfigError on
    structural problems or unknown mode/override catalog references —
    matches the load-time crash policy used by
    cinema_loader._validate_cinema_config (Phase 2): config bugs surface
    immediately at load, not at render time.
    """
    if block is None:
        return None

    # Local import — plan_loader is loaded very early in process startup;
    # cinema_loader pulls in yaml + a CONFIG_DIR stat. Deferring keeps
    # plan_loader import-cost identical to pre-build for legacy plans.
    # _MODE_CATALOG_FIELDS is imported (not redefined locally) — Law 1
    # SSOT. cinema_loader is the canonical home for the
    # override-field → catalog-name mapping; reusing it here means
    # adding a 7th catalog in cinema_loader automatically enables
    # overrides for that catalog without a second edit.
    from recoil.pipeline._lib.cinema_loader import (
        CinemaConfigError,
        _MODE_CATALOG_FIELDS,
        load_cinema_modes,
    )

    if not isinstance(block, dict):
        raise CinemaConfigError(
            f"shot {shot_id!r}: 'cinematography' must be a mapping, got "
            f"{type(block).__name__}"
        )

    mode_id = block.get("mode")
    if not isinstance(mode_id, str) or not mode_id:
        raise CinemaConfigError(
            f"shot {shot_id!r}: 'cinematography.mode' missing or not a "
            f"non-empty string (got {mode_id!r})"
        )

    cinema = load_cinema_modes()
    modes = cinema["modes"]
    catalogs = cinema["catalogs"]

    if mode_id not in modes:
        raise CinemaConfigError(
            f"shot {shot_id!r}: 'cinematography.mode' = {mode_id!r} is not "
            f"a known mode in CINEMA_MODES.yaml. Available: {sorted(modes.keys())}"
        )

    overrides = block.get("overrides")
    if overrides is not None:
        if not isinstance(overrides, dict):
            raise CinemaConfigError(
                f"shot {shot_id!r}: 'cinematography.overrides' must be a "
                f"mapping, got {type(overrides).__name__}"
            )
        for field_name, ref in overrides.items():
            if field_name not in _MODE_CATALOG_FIELDS:
                raise CinemaConfigError(
                    f"shot {shot_id!r}: 'cinematography.overrides' has "
                    f"unknown field {field_name!r}. "
                    f"Allowed: {sorted(_MODE_CATALOG_FIELDS.keys())}"
                )
            cat_name = _MODE_CATALOG_FIELDS[field_name]
            if not isinstance(ref, str) or ref not in catalogs[cat_name]:
                raise CinemaConfigError(
                    f"shot {shot_id!r}: 'cinematography.overrides.{field_name}'"
                    f" = {ref!r} is not a valid id in catalog {cat_name!r}. "
                    f"Available: {sorted(catalogs[cat_name].keys())}"
                )

    return block


__all__ = [
    "CanonicalPlan",
    "CanonicalShot",
    "CharacterEntry",
    "PlanValidationError",
    "_validate_cinematography_block",
    "load_plan",
]
