"""Transport-agnostic workspace read models."""

from __future__ import annotations

import json
import logging
import re
from pathlib import Path
from typing import Any

from recoil.api.schemas.atom import (
    AtomModel,
    AtomVersionModel,
    ContinuityReport,
    WardrobeMismatch,
)
from recoil.api.schemas.board import (
    BeatRefModel,
    BoardDetail,
    BoardModel,
    BoardProvenance,
    StoryGateSummary,
)
from recoil.api.schemas.composition import CompositionManifest
from recoil.api.schemas.scene_version import (
    SceneVersionEntryModel,
    SceneVersionsModel,
)
from recoil.core.paths import ProjectPaths
from recoil.pipeline.core.persistence import (
    active_scene_body_path,
    active_version,
    list_scenes,
    load_manifest,
    load_scene_active,
    load_scene_active_with_version,
)
from recoil.pipeline.core.take import (
    Beat,
    Scene,
    Take,
    atom_urn,
    parse_atom_urn,
    parse_atom_version_urn,
)
from recoil.pipeline.core.take import (
    atom_version_urn as build_atom_version_urn,
)
from recoil.pipeline.orchestrator.batch_selector import resolve

logger = logging.getLogger(__name__)

_ARTIFACT_VERSION_RE = re.compile(r"_v(\d+)(?=\.png$)", re.IGNORECASE)


def get_board(any_id: str, project: str) -> BoardModel:
    """Return the pointer-faithful selector board projection."""
    ref = resolve(any_id, project)
    ref_model = BeatRefModel.from_ref(ref)
    paths = ProjectPaths.for_project(ref.project)
    project_root = paths.project_root
    candidates, versions = _candidate_versions(paths, ref.selector, ref.episode)

    # REC-231 Phase 4: resolve the ACTIVE version body via the pointer, not the
    # raw flat path — so a conform/revert changes what the board read-model shows.
    scene = load_scene_active(ref.project, ref.episode, ref.scene_id)
    beat = scene.beats[0]
    pointer = beat.board
    if pointer is None or not pointer.get("artifact"):
        return BoardModel(
            ref=ref_model,
            board_artifact=None,
            board=None,
            provenance=None,
            candidates=candidates,
            versions=versions,
            newer_unpointed_versions=0,
            reason="no_board_pointer",
        )

    board_artifact = pointer["artifact"]
    pointed_version = _parse_version(board_artifact)
    provenance = BoardProvenance(
        status=pointer.get("status"),
        approved_by=pointer.get("approved_by"),
        source_sha256=pointer.get("source_sha256"),
        version=pointed_version,
        updated_at=pointer.get("updated_at"),
        gate=StoryGateSummary.from_gate(pointer.get("story_gate")),
    )
    board, detail_status = _load_pointed_detail(project_root, ref.selector, board_artifact)
    newer_unpointed_versions = (
        sum(1 for version in versions if version > pointed_version)
        if pointed_version is not None
        else 0
    )
    return BoardModel(
        ref=ref_model,
        board_artifact=board_artifact,
        board=board,
        provenance=provenance,
        candidates=candidates,
        versions=versions,
        newer_unpointed_versions=newer_unpointed_versions,
        detail_status=detail_status,
    )


def get_scene_versions(any_id: str, project: str) -> SceneVersionsModel:
    """Return the pointer-faithful scene-VERSION projection for a selector (REC-231).

    Pure projection — never writes, never moves the pointer. Mirrors ``get_board``'s
    graceful no-data branch: an un-versioned (flat) batch projects as a single
    v1-active/approved/derived entry (``shot_count`` from the live body); a versioned
    batch projects ``active_version`` + every registered version + the count of newer
    UNPOINTED candidates. The newest version is NEVER treated as active — only the
    manifest pointer is.
    """
    ref = resolve(any_id, project)
    manifest = load_manifest(ref.project, ref.episode, ref.scene_id)
    if manifest is None:
        # Flat Case-A batch: absence of a manifest IS the v1-active default. The PINNED
        # values prevent divergent flat-v1 projections; shot_count is the live body's.
        scene = load_scene_active(ref.project, ref.episode, ref.scene_id)
        return SceneVersionsModel(
            active_version=1,
            newer_unpointed_versions=0,
            versions=[
                SceneVersionEntryModel(
                    version=1,
                    state="approved",
                    downstream="derived",
                    kind="reshoot",
                    artifact=active_scene_body_path(
                        ref.project, ref.episode, ref.scene_id
                    ).name,
                    shot_count=len(scene.beats),
                    structure_fingerprint=None,
                )
            ],
        )
    av = active_version(manifest)
    versions = [SceneVersionEntryModel(**entry) for entry in manifest["versions"]]
    newer_unpointed_versions = sum(1 for v in versions if v.version > av)
    return SceneVersionsModel(
        active_version=av,
        newer_unpointed_versions=newer_unpointed_versions,
        versions=versions,
    )


def _load_pointed_detail(
    project_root: Path,
    selector: str,
    board_artifact: str,
) -> tuple[BoardDetail | None, str | None]:
    sidecar_path = _safe_project_sidecar_path(project_root, board_artifact)
    if sidecar_path is None:
        logger.warning(
            "pointed_sidecar_corrupt",
            extra={"selector": selector, "artifact": board_artifact},
        )
        return None, "pointed_sidecar_corrupt"
    if not sidecar_path.exists():
        logger.warning(
            "pointed_sidecar_missing",
            extra={"selector": selector, "artifact": board_artifact},
        )
        return None, "pointed_sidecar_missing"
    try:
        with sidecar_path.open("r", encoding="utf-8") as f:
            sidecar = json.load(f)
        if not isinstance(sidecar, dict):
            raise ValueError("pointed sidecar must be a JSON object")
    except (json.JSONDecodeError, OSError, ValueError):
        logger.warning(
            "pointed_sidecar_corrupt",
            extra={"selector": selector, "artifact": board_artifact},
        )
        return None, "pointed_sidecar_corrupt"
    return BoardDetail.from_sidecar(sidecar), None


def _safe_project_sidecar_path(project_root: Path, board_artifact: str) -> Path | None:
    artifact_path = Path(board_artifact)
    if artifact_path.is_absolute() or ".." in artifact_path.parts:
        return None
    return project_root / f"{artifact_path.as_posix()}.json"


def _candidate_versions(
    paths: ProjectPaths,
    selector: str,
    episode: int,
) -> tuple[list[str], list[int]]:
    storyboards_dir = paths.episode_storyboards_dir(episode)
    sidecar_re = re.compile(rf"^{re.escape(selector)}_v(\d+)\.png\.json$")
    found: list[tuple[int, str]] = []
    if not storyboards_dir.is_dir():
        return [], []
    for path in storyboards_dir.iterdir():
        if not path.is_file():
            continue
        match = sidecar_re.match(path.name)
        if not match:
            continue
        artifact_path = path.with_suffix("")
        artifact = _project_relative(paths.project_root, artifact_path)
        if artifact is None:
            continue
        found.append((int(match.group(1)), artifact))

    found.sort(key=lambda item: item[0])
    return [artifact for _, artifact in found], [version for version, _ in found]


def _project_relative(project_root: Path, path: Path) -> str | None:
    try:
        rel = path.relative_to(project_root)
    except ValueError:
        return None
    if ".." in rel.parts:
        return None
    return rel.as_posix()


def _parse_version(artifact: Any) -> int | None:
    if not isinstance(artifact, str):
        return None
    match = _ARTIFACT_VERSION_RE.search(artifact)
    if not match:
        return None
    return int(match.group(1))


# ──────────────────────────────────────────────────────────────────────
# REC-235 Phase 0: atom-version index projection (atom_read_model).
#
# Address atom:{episode}/beat/{beat_id}@tN → artifact + continuity facets,
# resolved against the ACTIVE persisted Scene→Beat→Take tree (the REC-231
# canon pointer), batch-board-INDEPENDENT. Pure projection — no writes, no
# pointer moves, no DB — consistent with get_board / get_scene_versions.
#
# No-data convention (mirrors get_board exactly): a not-found beat / take
# index / non-addressable episode returns the typed model carrying a
# non-empty `reason`. A malformed URN raises ValueError at parse time
# (fail-loud, via take.parse_atom_*). An episode-non-unique beat_id is a
# data-integrity violation → ValueError (never silently resolved to first).
# ──────────────────────────────────────────────────────────────────────

# The URN episode token is EP{NNN} (e.g. EP001). persistence._canonical_
# episode_token (persistence.py:151) accepts ONLY an int / digit-string /
# ep_NNN and RAISES on EP001, so every persistence call must be fed the
# normalized form — EP001 → 1 → ep_001. Case-insensitive on the EP prefix.
_URN_EPISODE_RE = re.compile(r"^EP(\d+)$", re.IGNORECASE)


def _persistence_episode(urn_episode: str) -> int | None:
    """Map a URN episode token (``EP{NNN}``) to the persistence episode int.

    Returns ``EP001 → 1`` (case-insensitive on the ``EP`` prefix), or ``None`` when the
    token is not ``EP\\d+`` (the URN can't address a persisted episode → a no-data
    ``reason``). Normalization is for the persistence LOOKUP only — the ORIGINAL URN
    token is preserved verbatim in every returned ``atom_version_urn`` / ``beat_urn``.
    """
    if not isinstance(urn_episode, str):
        return None
    match = _URN_EPISODE_RE.match(urn_episode)
    if not match:
        return None
    return int(match.group(1))


def _seq_id_from_scene_file(name: str, episode_int: int) -> str:
    """Recover a scene's seq_id (batch_id) from its flat filename.

    ``list_scenes`` returns ``{token}_{seq_id}.json`` files (``token == ep_NNN``), and
    ``scene_path(project, episode, seq_id)`` rebuilds exactly that name — so stripping the
    ``{token}_`` prefix and ``.json`` suffix yields the seq_id that round-trips through
    ``load_scene_active_with_version``. No body load required.
    """
    prefix = f"ep_{episode_int:03d}_"
    return name[len(prefix):-len(".json")]


def _unique_active_beat(
    project: str, episode_int: int, beat_id: str
) -> tuple[Scene | None, Beat | None]:
    """Find the single ACTIVE beat with this ``beat_id`` across the episode's scenes.

    Enumerates the episode's flat scenes (``list_scenes``), loads each scene's ACTIVE
    version (``load_scene_active_with_version`` — the REC-231 canon pointer, NOT the batch
    board), and collects every Beat with ``beat.beat_id == beat_id``. ``beat_id`` is not
    guaranteed episode-unique in live code, so Phase 0 enforces uniqueness HERE: returns
    ``(scene, beat)`` for exactly 1 match, ``(None, None)`` for 0 (→ no-data ``reason`` in
    the caller), and raises ``ValueError`` for >1 (a data-integrity violation — the URN
    grammar assumes episode-unique beat ids, never silently resolved to the first).
    """
    matches: list[tuple[Scene, Beat]] = []
    for scene_file in list_scenes(project, episode_int):
        seq_id = _seq_id_from_scene_file(scene_file.name, episode_int)
        scene, _version = load_scene_active_with_version(project, episode_int, seq_id)
        for beat in scene.beats:
            if beat.beat_id == beat_id:
                matches.append((scene, beat))
    if len(matches) > 1:
        raise ValueError(
            f"ambiguous atom URN: beat_id {beat_id!r} is active in {len(matches)} scenes"
        )
    if not matches:
        return None, None
    return matches[0]


def _find_take(beat: Beat, take_index: int) -> Take | None:
    for take in beat.takes:
        if take.take_index == take_index:
            return take
    return None


def _safe_artifact_relpath(project_root: Path, output_path: str) -> str | None:
    """Resolve a receipt ``output_path`` to a SAFE project-relative artifact string.

    Live StepRunner returns project-RELATIVE ``rel_path`` strings on success, but a bare
    ``_project_relative`` drops an already-relative path (``relative_to`` fails) — so this
    helper keeps a safe relative path AS-IS and only converts absolute-under-root paths:
      (a) relative + safe (no ``..``) → returned unchanged;
      (b) absolute + under ``project_root`` → made relative via ``_project_relative``;
      (c) absolute outside root, or any ``..``/escape → ``None`` (never leak an
          out-of-root path).
    """
    if not isinstance(output_path, str) or not output_path:
        return None
    path = Path(output_path)
    if path.is_absolute():
        # (b) under root → relative; (c) outside root or contains '..' → None.
        return _project_relative(project_root, path)
    if ".." in path.parts:
        return None  # (c) a relative path that escapes the root.
    return path.as_posix()  # (a) already-relative + safe → unchanged.


def _terminal_artifact(project: str, take: Take) -> str | None:
    """The take's terminal-step artifact as a SAFE project-relative path, else ``None``.

    Terminal step = the LAST entry in ``workflow.steps`` whose ``receipt`` is non-None and
    exposes a ``run_result.output_path``. This is the workflow-step artifact (panel/clip),
    NOT ``Beat.board`` (the batch board — forbidden here). ``None`` when no step has a
    receipt/output yet.
    """
    output_path: str | None = None
    for step in take.workflow.steps:
        receipt = getattr(step, "receipt", None)
        if receipt is None:
            continue
        run_result = getattr(receipt, "run_result", None)
        candidate = getattr(run_result, "output_path", None) if run_result is not None else None
        if candidate:
            output_path = candidate  # keep the LAST step that carries an output.
    if not output_path:
        return None
    project_root = ProjectPaths.for_project(project).project_root
    return _safe_artifact_relpath(project_root, output_path)


def _facets_for(scene: Scene, beat: Beat, take: Take) -> dict[str, Any]:
    """Extract the six continuity facets from the REAL live metadata shape.

    Live beats store the shot at ``beat.beat_metadata["shot"] ==
    dataclasses.asdict(CanonicalShot)`` (the EpisodeRunner shape) — NOT flat facet keys.
    Primary source is that shot dict, with a per-facet fallback to ``take.take_metadata``
    then ``take.provenance`` (purely-additive, optional). ``spine_anchor`` is computed
    from the scene/order context the resolver passes in. A facet is the found value, or
    ``None`` when genuinely absent (the contract does NOT require all six to be populated).
    """
    shot = (beat.beat_metadata or {}).get("shot") or {}
    raw = shot.get("raw") or {}
    take_meta = take.take_metadata or {}
    provenance = take.provenance or {}

    def _facet(key: str, primary: Any) -> Any:
        if primary is not None:
            return primary
        for source in (take_meta, provenance):
            value = source.get(key)
            if value is not None:
                return value
        return None

    characters = shot.get("characters") or []
    identity = [c.get("char_id") for c in characters
                if isinstance(c, dict) and c.get("char_id")] or None
    # wardrobe is a PROMOTED per-character field on CanonicalShot.characters[]
    # (char_id -> wardrobe_phase_id), NOT a flat raw key.
    wardrobe = {c["char_id"]: c["wardrobe_phase_id"] for c in characters
                if isinstance(c, dict) and c.get("char_id") and c.get("wardrobe_phase_id")} or None
    # action / props live in the ORIGINAL nested plan-shot dict (raw), NOT flat keys:
    # action ← prompt_data.kinetic_action ; props ← asset_data.props / prop_interaction.
    raw_prompt = raw.get("prompt_data") or {}
    raw_asset = raw.get("asset_data") or {}
    action_primary = raw_prompt.get("kinetic_action") or raw.get("action") or raw.get("description")
    props_primary = raw_asset.get("props") or None
    if props_primary is None:
        pi = raw_asset.get("prop_interaction")
        props_primary = pi if pi not in (None, "", "none") else None

    # shot_type is the LAST resort for action — only after take_metadata/provenance.
    action = _facet("action", action_primary)
    if action is None:
        action = shot.get("shot_type")

    return {
        "identity": _facet("identity", identity),
        "action": action,
        "wardrobe": _facet("wardrobe", wardrobe),
        "props": _facet("props", props_primary),
        "location": _facet("location", shot.get("location_id")),
        "spine_anchor": {
            "shot_id": shot.get("shot_id"),
            "scene_id": scene.scene_id,
            "beat_index": scene.beats.index(beat),
        },
    }


def resolve_atom_version(atom_version_urn: str, project: str) -> AtomVersionModel:
    """Resolve an atom-version URN → artifact + continuity facets + canon flag.

    Pure projection against the ACTIVE persisted scene tree (REC-231 canon pointer),
    batch-board-INDEPENDENT. A malformed URN raises ``ValueError`` (fail-loud, via
    ``parse_atom_version_urn``); a not-found beat / take index or a non-addressable
    episode returns a model carrying a non-empty ``reason`` (the no-data convention,
    mirroring ``get_board``); an episode-non-unique ``beat_id`` raises ``ValueError``.
    """
    episode, beat_id, take_index = parse_atom_version_urn(atom_version_urn)
    beat_urn = atom_urn(episode, beat_id)  # preserves the ORIGINAL episode token.
    episode_int = _persistence_episode(episode)
    if episode_int is None:
        return AtomVersionModel(
            atom_version_urn=atom_version_urn,
            beat_urn=beat_urn,
            take_index=take_index,
            reason=f"episode token {episode!r} is not addressable to a persisted "
            "episode (expected EP<NNN>)",
        )
    scene, beat = _unique_active_beat(project, episode_int, beat_id)
    if beat is None:
        return AtomVersionModel(
            atom_version_urn=atom_version_urn,
            beat_urn=beat_urn,
            take_index=take_index,
            reason=f"no active beat {beat_id!r} in episode {episode}",
        )
    take = _find_take(beat, take_index)
    if take is None:
        return AtomVersionModel(
            atom_version_urn=atom_version_urn,
            beat_urn=beat_urn,
            take_index=take_index,
            reason=f"no atom-version @t{take_index} on beat {beat_id!r} in episode {episode}",
        )
    is_canon = beat.primary_take_id is not None and take.take_id == beat.primary_take_id
    return AtomVersionModel(
        atom_version_urn=atom_version_urn,
        beat_urn=beat_urn,
        take_index=take_index,
        artifact=_terminal_artifact(project, take),
        facets=_facets_for(scene, beat, take),
        is_canon=is_canon,
    )


def get_atom(beat_urn: str, project: str) -> AtomModel:
    """Resolve a beat-grain atom URN → ordered atom-versions + canon + unpointed count.

    Pure projection against the ACTIVE persisted scene tree, batch-board-INDEPENDENT.
    Parses ``beat_urn`` via the SINGLE canonical ``parse_atom_urn`` (no re-implemented
    inverse). A malformed URN raises ``ValueError``; a not-found beat or a non-addressable
    episode returns a model carrying a non-empty ``reason``; an episode-non-unique
    ``beat_id`` raises ``ValueError``. ``newer_unpointed_versions`` = takes whose
    ``take_index`` exceeds the canon take's (0 when no canon pointer is set) — the
    beat-grain analogue of ``get_board``'s batch-grain field.
    """
    episode, beat_id = parse_atom_urn(beat_urn)
    episode_int = _persistence_episode(episode)
    if episode_int is None:
        return AtomModel(
            beat_urn=beat_urn,
            reason=f"episode token {episode!r} is not addressable to a persisted "
            "episode (expected EP<NNN>)",
        )
    _scene, beat = _unique_active_beat(project, episode_int, beat_id)
    if beat is None:
        return AtomModel(
            beat_urn=beat_urn,
            reason=f"no active beat {beat_id!r} in episode {episode}",
        )
    ordered = sorted(beat.takes, key=lambda t: t.take_index)
    take_indices = [t.take_index for t in ordered]
    atom_version_urns = [
        build_atom_version_urn(episode, beat_id, t.take_index) for t in ordered
    ]
    canon = beat.primary_take
    if canon is not None:
        canon_take_index: int | None = canon.take_index
        canon_urn: str | None = build_atom_version_urn(episode, beat_id, canon.take_index)
        newer_unpointed = sum(1 for t in beat.takes if t.take_index > canon.take_index)
    else:
        canon_take_index = None
        canon_urn = None
        newer_unpointed = 0
    return AtomModel(
        beat_urn=beat_urn,
        atom_version_urns=atom_version_urns,
        take_indices=take_indices,
        canon_take_index=canon_take_index,
        canon_urn=canon_urn,
        newer_unpointed_versions=newer_unpointed,
    )


def wardrobe_continuity_check(atom_version_urns: list[str], project: str) -> ContinuityReport:
    """Flag any char whose wardrobe_phase_id differs across the RESOLVED atoms.
    Pure index query over resolve_atom_version facets. A malformed URN raises ValueError
    (fail-loud). An atom that resolves no-data (non-empty `reason`) is recorded in
    `unresolved` and excluded — NEVER treated as a real 'no wardrobe' value (no false-negatives).
    """
    by_char: dict[str, dict[str, str]] = {}   # char_id -> { atom_version_urn -> wardrobe_phase_id }
    unresolved: list[str] = []
    for urn in atom_version_urns:
        av = resolve_atom_version(urn, project)
        if av.reason:                      # no-data atom — visible, excluded from the check
            unresolved.append(urn)
            continue
        wardrobe = (av.facets or {}).get("wardrobe") or {}
        for char_id, phase in wardrobe.items():
            by_char.setdefault(char_id, {})[urn] = phase
    mismatches = [
        WardrobeMismatch(char_id=cid, phases_by_atom=per_atom)
        for cid, per_atom in by_char.items()
        if len(set(per_atom.values())) > 1
    ]
    return ContinuityReport(checked=list(atom_version_urns),
                            wardrobe_mismatches=mismatches, unresolved=unresolved)


# REC-235 Phase 3: index-served cross-batch composition assembly (atom_read_model).
# Thin builders over the Phase 1 CompositionManifest shape — members may be atom-version
# URNs drawn from DIFFERENT batches/episodes (the manifest + render_composition_view are
# already batch-independent: they point at atom URNs, resolved on demand). Pure assembly:
# NO resolution, NO generation, NO writes. Rendered by board_builder.render_composition_view
# (ZERO generation). kind is always "CUT" for these builders.
def build_cut_composition(
    atom_version_urns: list[str], layout: dict[str, Any] | None = None
) -> CompositionManifest:
    """Assemble a cross-batch CUT composition from atom-version URNs.

    Members may come from any batch/episode — the schema validates each is an
    atom-version URN, so a malformed URN raises at construction (fail-loud). Assembly
    only: does NOT resolve or generate; the on-demand grid/strip VIEW is rendered by
    ``board_builder.render_composition_view`` (ZERO generation).
    """
    return CompositionManifest(
        kind="CUT", members=list(atom_version_urns), layout=layout or {}
    )


def build_pairwise_view(urn_a: str, urn_b: str) -> CompositionManifest:
    """Atom A next to atom B for continuity assessment — a 2-member cross-batch CUT."""
    return build_cut_composition([urn_a, urn_b])
