"""look_loader.py — Look & Identity SSOT registries + LookBundle resolver.

Single source of truth loader for the in-repo Look and Identity objects that
drive the krea2-flora image pipeline. A *Look* is a series/house-scoped style
definition (palette, style reference images, look-pack phrasing, a FK into the
cinema-mode cinematography SSOT). An *Identity* is a recurring subject
(character) with a set of identity reference images and an optional textual
trigger anchor.

This module is PURE DATA + LOGIC — it does NOT generate anything and makes no
API calls. Phase 2 wires the resolver output (`LookBundle`) into
`execute_keyframe`; Phase 3 wires the per-project `ctx.project_config["look"]`
/ `["identities"]` selection.

REF-ROOT CONVENTION
-------------------
Look `style_refs[].path` and Identity `ref_set[].path` are resolved relative
to ``recoil/config/`` (``CONFIG_DIR``). This keeps the SSOT self-contained:
the Look/Identity YAMLs live under ``recoil/config/looks`` and
``recoil/config/identities``, and their referenced images live under
``recoil/config/looks/<look_id>/...`` and
``recoil/config/refs/characters/<identity_id>/...`` — all committed together,
with NO dependency on a per-project ``projects_root()``. ``load_registries()``
validates that every referenced ref file actually exists on disk under this
root (fail fast).

VALIDATION POLICY (load_registries — fail fast)
-----------------------------------------------
  * ``schema_version`` present on every Look and Identity.
  * FK ``extends_cinema_mode`` exists in CINEMA_MODES.yaml ``modes:``.
  * No duplicate ids (id == filename stem; cross-checked).
  * Every referenced ref file exists on disk under CONFIG_DIR.
Any failure raises (``LookRegistryError``). Cached after first success.

BACKING / LoRA
--------------
v1 is reference-first. ``backing`` is always ``"references"`` and ``loras`` is
always ``[]``. The LoRA branch keys off the target model's ``supports_lora``
profile flag, which is ``false`` everywhere in v1 — so the branch is DEAD. It
is implemented only as a guard for the v2 promotion slot; no real LoRA logic
exists here.
"""

from __future__ import annotations

import json
from dataclasses import dataclass
import logging
from pathlib import Path
from typing import Optional

import yaml

from recoil.core.paths import CONFIG_DIR

logger = logging.getLogger(__name__)

# Conservative ref budget used when a target model carries NO model_profiles
# entry (e.g. the Google keyframe aliases 'nbp' / 'flash', which live in
# provider_strategy.json but have no profile). Lower bound most image models
# accept; tune by adding a real profile entry with explicit caps.
_DEFAULT_MAX_REFS = 4

# --------------------------------------------------------------------------- #
# Ref-root + config locations
# --------------------------------------------------------------------------- #

# Ref paths in Look.style_refs[].path and Identity.ref_set[].path resolve
# against CONFIG_DIR (recoil/config/). See module docstring.
REF_ROOT = CONFIG_DIR
LOOKS_DIR = CONFIG_DIR / "looks"
IDENTITIES_DIR = CONFIG_DIR / "identities"
_CINEMA_PATH = CONFIG_DIR / "CINEMA_MODES.yaml"
_PROFILES_PATH = CONFIG_DIR / "model_profiles.json"


class LookRegistryError(Exception):
    """Raised on any Look/Identity registry validation failure (fail fast)."""


# --------------------------------------------------------------------------- #
# Resolver output dataclasses
# --------------------------------------------------------------------------- #


@dataclass
class ResolvedRef:
    """A single reference image resolved for a generation call.

    ``path`` is the raw (CONFIG_DIR-relative) path string as authored in the
    YAML. ``role`` is "identity" for character refs, "style" for look refs.
    ``priority`` orders refs (lower number kept first when budget-truncating).
    """

    path: str
    role: str
    priority: int


@dataclass
class LookBundle:
    """Resolved Look + Identity payload ready for prompt/ref assembly.

    Phase 2's ``apply_look(prompt, bundle)`` consumes these fields. Ref lists
    are already budgeted to the target model's capacity.
    """

    cinema_mode_id: str            # FK resolved; render_cinema_tokens() sources from this
    style_refs: list[ResolvedRef]  # budgeted to target model's remaining ref slots
    identity_refs: list[ResolvedRef]  # budgeted to max_character_refs, placed first
    palette: Optional[dict]
    look_pack: dict                # {positive: [...], avoid: [...]}
    triggers: list[str]
    aspect: str
    creativity: Optional[str]
    backing: str                   # "references" (v1 always) | "lora" (v2)
    loras: list[dict]              # [] in v1
    ref_budget: dict               # {max_refs, used_identity, used_style, truncated:[...]}
    provenance: dict               # {look_id, identity_ids:[...]}


# --------------------------------------------------------------------------- #
# Registry cache
# --------------------------------------------------------------------------- #

_looks_cache: Optional[dict] = None
_identities_cache: Optional[dict] = None


def reload_registries() -> None:
    """Clear the registry cache (for tests + dev reloads)."""
    global _looks_cache, _identities_cache
    _looks_cache = None
    _identities_cache = None


# --------------------------------------------------------------------------- #
# Low-level helpers
# --------------------------------------------------------------------------- #


def _load_cinema_mode_ids(cinema_path: Path = _CINEMA_PATH) -> set[str]:
    """Return the set of valid cinema mode ids from CINEMA_MODES.yaml."""
    try:
        data = yaml.safe_load(cinema_path.read_text()) or {}
    except FileNotFoundError as exc:
        raise LookRegistryError(f"CINEMA_MODES.yaml not found at {cinema_path}") from exc
    modes = data.get("modes") or {}
    return set(modes.keys())


def _glob_yaml(directory: Path) -> list[Path]:
    """Return sorted *.yaml files in a directory (empty list if dir absent)."""
    if not directory.is_dir():
        return []
    return sorted(directory.glob("*.yaml"))


def _parse_yaml_file(path: Path) -> dict:
    try:
        data = yaml.safe_load(path.read_text())
    except yaml.YAMLError as exc:
        raise LookRegistryError(f"{path.name}: invalid YAML — {exc}") from exc
    if not isinstance(data, dict):
        raise LookRegistryError(f"{path.name}: top-level YAML must be a mapping")
    return data


def _validate_ref_exists(rel_path: str, owner_id: str, ref_root: Path) -> None:
    """Raise if a ref file does not exist on disk under ``ref_root``."""
    if not rel_path:
        raise LookRegistryError(f"{owner_id}: empty ref path")
    resolved = (ref_root / rel_path).resolve()
    if not resolved.is_file():
        raise LookRegistryError(
            f"{owner_id}: ref file missing on disk — {rel_path} "
            f"(resolved {resolved} under ref root {ref_root})"
        )


# --------------------------------------------------------------------------- #
# Registry loading + validation
# --------------------------------------------------------------------------- #


def load_registries(
    *,
    looks_dir: Path = LOOKS_DIR,
    identities_dir: Path = IDENTITIES_DIR,
    cinema_path: Path = _CINEMA_PATH,
    ref_root: Path = REF_ROOT,
    use_cache: bool = True,
) -> tuple[dict, dict]:
    """Glob, parse, validate, and cache the Look + Identity registries.

    Returns ``(looks_dict, identities_dict)`` keyed by id. Raises
    ``LookRegistryError`` on ANY failure (fail fast):
      * schema_version missing on a Look/Identity,
      * id != filename stem, or duplicate ids,
      * FK ``extends_cinema_mode`` not in CINEMA_MODES.yaml,
      * a referenced ref file missing on disk under ``ref_root``.

    The default arguments point at the committed registries; tests inject
    ``tmp_path``-rooted dirs for self-contained fixtures.
    """
    global _looks_cache, _identities_cache
    using_defaults = (
        looks_dir == LOOKS_DIR
        and identities_dir == IDENTITIES_DIR
        and cinema_path == _CINEMA_PATH
        and ref_root == REF_ROOT
    )
    if use_cache and using_defaults and _looks_cache is not None:
        return _looks_cache, _identities_cache  # type: ignore[return-value]

    valid_modes = _load_cinema_mode_ids(cinema_path)

    looks: dict = {}
    for path in _glob_yaml(looks_dir):
        stem = path.stem
        look = _parse_yaml_file(path)
        if "schema_version" not in look:
            raise LookRegistryError(f"look '{stem}': missing schema_version")
        look_id = look.get("look_id")
        if look_id != stem:
            raise LookRegistryError(
                f"look file '{path.name}': look_id={look_id!r} must equal filename stem {stem!r}"
            )
        if look_id in looks:
            raise LookRegistryError(f"duplicate look_id: {look_id!r}")
        fk = look.get("extends_cinema_mode")
        if fk not in valid_modes:
            raise LookRegistryError(
                f"look '{look_id}': extends_cinema_mode={fk!r} not a valid cinema mode "
                f"(known: {sorted(valid_modes)})"
            )
        for ref in look.get("style_refs") or []:
            _validate_ref_exists(ref.get("path", ""), f"look '{look_id}'", ref_root)
        looks[look_id] = look

    identities: dict = {}
    for path in _glob_yaml(identities_dir):
        stem = path.stem
        ident = _parse_yaml_file(path)
        if "schema_version" not in ident:
            raise LookRegistryError(f"identity '{stem}': missing schema_version")
        ident_id = ident.get("identity_id")
        if ident_id != stem:
            raise LookRegistryError(
                f"identity file '{path.name}': identity_id={ident_id!r} must equal filename stem {stem!r}"
            )
        if ident_id in identities:
            raise LookRegistryError(f"duplicate identity_id: {ident_id!r}")
        for ref in ident.get("ref_set") or []:
            _validate_ref_exists(ref.get("path", ""), f"identity '{ident_id}'", ref_root)
        identities[ident_id] = ident

    if use_cache and using_defaults:
        _looks_cache = looks
        _identities_cache = identities

    return looks, identities


# --------------------------------------------------------------------------- #
# Resolver lookups (Phase 3 wires ctx.project_config; here just the lookup)
# --------------------------------------------------------------------------- #


class ProjectBindingError(Exception):
    """Raised when a project_config's Look/cinema_mode binding is invalid.

    The only hard rule (Phase 3): a project may set EITHER ``look`` OR a
    project-level ``cinema_mode``, never both. A Look already carries
    ``extends_cinema_mode`` (the resolver derives the effective cinema mode
    from the bound look), so setting both is contradictory.
    """


def validate_project_binding(config: Optional[dict]) -> None:
    """Validate the Look/Identity binding in a loaded project_config dict.

    Enforces the **`look` XOR project-level `cinema_mode`** rule:

      * BOTH set            → raise ``ProjectBindingError`` (contradictory:
                              the Look already carries ``extends_cinema_mode``).
      * only ``look``       → valid; the effective cinema mode is resolved from
                              ``look.extends_cinema_mode`` downstream.
      * only ``cinema_mode``→ valid; unchanged legacy behavior.
      * NEITHER             → valid; byte-identical to today.

    ``look`` and ``identities`` are OPTIONAL fields. Their absence is the
    common case for every existing project and must remain valid (back-compat).
    This validator does NOT resolve the look against the registry (that is
    ``resolve_look``'s job) — it only enforces the XOR shape rule so the error
    surfaces at config-load time rather than at first render.

    A ``None`` / non-dict config is a no-op (tolerant, like the resolver).
    """
    if not isinstance(config, dict):
        return
    has_look = bool(config.get("look"))
    # An empty-string / falsy cinema_mode is treated as "not set" — matches
    # load_project_config's skip-empty-string merge policy.
    has_cinema_mode = bool(config.get("cinema_mode"))
    if has_look and has_cinema_mode:
        raise ProjectBindingError(
            "project_config sets BOTH 'look' "
            f"({config.get('look')!r}) and project-level 'cinema_mode' "
            f"({config.get('cinema_mode')!r}). A Look already carries "
            "'extends_cinema_mode', so setting both is contradictory — set "
            "EITHER a 'look' OR a 'cinema_mode', never both."
        )


def resolve_look(ctx) -> Optional[dict]:
    """Resolve the active Look for a context.

    Reads ``ctx.project_config["look"]`` (the selected look_id) and returns
    the matching Look dict from the registry, or ``None`` if no look is
    configured / found. Tolerates a missing ``project_config`` or ``look`` key.
    """
    look_id = _ctx_get(ctx, "look")
    if not look_id:
        return None
    looks, _ = load_registries()
    return looks.get(look_id)


def resolve_identity(ctx, character) -> Optional[dict]:
    """Resolve a single Identity for a context + character.

    ``ctx.project_config["identities"]`` maps a character key → identity_id
    (or is a list of identity_ids). ``character`` is the character key/id to
    look up. Returns the matching Identity dict, or ``None`` if not configured
    / found. Tolerates missing keys.
    """
    mapping = _ctx_get(ctx, "identities")
    if not mapping:
        return None
    _, identities = load_registries()

    identity_id: Optional[str] = None
    if isinstance(mapping, dict):
        identity_id = mapping.get(character)
    elif isinstance(mapping, (list, tuple, set)):
        # list form: character is itself the identity_id if present
        if character in mapping:
            identity_id = character
    if not identity_id:
        return None
    return identities.get(identity_id)


def _ctx_get(ctx, key: str):
    """Best-effort read of ctx.project_config[key]; tolerate missing pieces."""
    pc = getattr(ctx, "project_config", None)
    if pc is None and isinstance(ctx, dict):
        pc = ctx.get("project_config")
    if not isinstance(pc, dict):
        return None
    return pc.get(key)


# --------------------------------------------------------------------------- #
# Model-profile cap lookup
# --------------------------------------------------------------------------- #


def _load_model_caps(
    target_model: str, profiles_path: Path = _PROFILES_PATH
) -> tuple[int, int | None, bool]:
    """Return (max_reference_images, max_character_refs, supports_lora) for a model.

    Reads model_profiles.json raw (no cross-config validation) so the loader
    stays self-contained. Missing/absent ``max_reference_images`` defaults to 0.
    ``max_character_refs`` distinguishes THREE states, which matter for budgeting:
      * an explicit integer (e.g. 4) → a hard identity sub-cap;
      * an explicit ``0`` → this model takes NO identity refs;
      * ``null``/absent → ``None``, meaning "no separate identity sub-cap" — the
        identity refs may use the full ``max_reference_images`` budget. Most
        models in model_profiles.json leave this null, so coalescing null→0 here
        would silently drop EVERY identity ref for them (the look's whole point).
    Missing ``supports_lora`` defaults to False.
    """
    try:
        profiles = json.loads(profiles_path.read_text())
    except FileNotFoundError as exc:
        raise LookRegistryError(f"model_profiles.json not found at {profiles_path}") from exc
    prof = profiles.get(target_model)
    if not isinstance(prof, dict):
        # Some real keyframe models (notably the Google aliases 'nbp' / 'flash')
        # are routed via provider_strategy.json but carry NO model_profiles
        # entry. Raising here is silently swallowed by execute_keyframe's
        # best-effort look-resolution guard, which would drop the ENTIRE Look on
        # the most common keyframe model. Degrade gracefully instead: a
        # conservative ref budget (max_char=None → no identity sub-cap) so the
        # Look's prompt fragments + aspect always apply and a few refs ride
        # along. Loud warning so the missing profile gets noticed + tuned.
        logger.warning(
            "look_loader: no model_profiles entry for target_model %r — using "
            "conservative ref budget (max_refs=%d, no identity sub-cap). Add a "
            "model_profiles entry with explicit caps to tune.",
            target_model,
            _DEFAULT_MAX_REFS,
        )
        return _DEFAULT_MAX_REFS, None, False
    max_refs = int(prof.get("max_reference_images") or 0)
    raw_char = prof.get("max_character_refs")
    max_char = int(raw_char) if raw_char is not None else None
    supports_lora = bool(prof.get("supports_lora", False))
    return max_refs, max_char, supports_lora


# --------------------------------------------------------------------------- #
# Bundle assembly
# --------------------------------------------------------------------------- #


def _to_resolved(ref: dict, role: str) -> ResolvedRef:
    return ResolvedRef(
        path=ref.get("path", ""),
        role=ref.get("role", role),
        priority=int(ref.get("priority", 999)),
    )


def build_look_bundle(
    look: dict,
    identities: list[dict],
    target_model: str,
    shot=None,
    *,
    profiles_path: Path = _PROFILES_PATH,
) -> LookBundle:
    """Assemble a budgeted ``LookBundle`` for a target model.

    Budgeting:
      * Identity refs come FIRST, capped at the model's ``max_character_refs``.
      * Style refs fill the remainder up to ``max_reference_images``.
      * Overflow is truncated by ``priority`` (lower number kept first) and
        the dropped refs are recorded in ``ref_budget.truncated``.

    ``backing`` is always ``"references"`` in v1; ``loras`` is always ``[]``.
    The ``supports_lora`` branch is a DEAD guard for the v2 promotion slot
    (no model sets supports_lora=true in v1).
    """
    identities = identities or []
    max_refs, max_char, supports_lora = _load_model_caps(target_model, profiles_path)

    truncated: list[dict] = []

    # --- identity refs first (cap at max_character_refs) ---
    id_refs_all: list[ResolvedRef] = []
    identity_ids: list[str] = []
    for ident in identities:
        identity_ids.append(ident.get("identity_id"))
        for ref in ident.get("ref_set") or []:
            id_refs_all.append(_to_resolved(ref, "identity"))
    id_refs_all.sort(key=lambda r: r.priority)

    # A null/absent max_character_refs means "no separate identity sub-cap":
    # identities may use the full ref budget. An explicit 0 means "no identity
    # refs at all". Only an explicit positive int caps the identity sub-list.
    id_cap = max_char if max_char is not None else max_refs
    identity_refs = id_refs_all[:id_cap]
    for r in id_refs_all[id_cap:]:
        truncated.append({"path": r.path, "role": r.role, "priority": r.priority,
                          "reason": "max_character_refs"})

    # Identity refs are also bounded by the overall ref budget.
    if len(identity_refs) > max_refs:
        for r in identity_refs[max_refs:]:
            truncated.append({"path": r.path, "role": r.role, "priority": r.priority,
                              "reason": "max_reference_images"})
        identity_refs = identity_refs[:max_refs]

    # --- style refs fill the remainder up to max_reference_images ---
    style_refs_all: list[ResolvedRef] = [
        _to_resolved(ref, "style") for ref in (look.get("style_refs") or [])
    ]
    style_refs_all.sort(key=lambda r: r.priority)

    remaining = max(0, max_refs - len(identity_refs))
    style_refs = style_refs_all[:remaining]
    for r in style_refs_all[remaining:]:
        truncated.append({"path": r.path, "role": r.role, "priority": r.priority,
                          "reason": "max_reference_images"})

    # --- triggers (identity textual anchors) ---
    triggers: list[str] = []
    for ident in identities:
        trig = ident.get("trigger")
        if trig:
            triggers.append(trig)

    # --- DEAD LoRA guard (v2 promotion slot) ---
    backing = "references"
    loras: list[dict] = []
    if supports_lora:  # pragma: no cover — false everywhere in v1
        backing = "lora"
        # v2: populate loras from look['style_lora'] / identity['lora'].

    ref_budget = {
        "max_refs": max_refs,
        "max_character_refs": max_char,
        "used_identity": len(identity_refs),
        "used_style": len(style_refs),
        "truncated": truncated,
    }

    return LookBundle(
        cinema_mode_id=look.get("extends_cinema_mode"),
        style_refs=style_refs,
        identity_refs=identity_refs,
        palette=look.get("palette"),
        look_pack=look.get("look_pack") or {"positive": [], "avoid": []},
        triggers=triggers,
        aspect=look.get("aspect_default"),
        creativity=look.get("creativity"),
        backing=backing,
        loras=loras,
        ref_budget=ref_budget,
        provenance={"look_id": look.get("look_id"), "identity_ids": identity_ids},
    )