"""Coverage validation for the breakdown layer mention ledger.

The existing Stage-1 "Breakdown Pass" in ``ingest_pipeline`` writes
``global_bible.json``. This module belongs to the newer breakdown layer:
mention ledger coverage validation plus frozen operator reports for Gate A.

This is intentionally NOT wired into ``dispatch_payload`` / StepRunner
pre-spend in this build. That belongs to the later skeleton-tag regression
gate; running these R-rules on every video dispatch now would fire before the
needed breakdown ledger/skeleton context exists.
"""

from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any

from recoil.pipeline._lib.prose_validator import Severity


@dataclass
class CoverageFinding:
    """Structured coverage result consumed directly by the report writer."""

    severity: Severity
    check: str
    message: str
    mention: dict
    scene_id: str
    evidence: str


MENTION_KEY_FIELDS: dict[str, tuple[str, ...]] = {
    "character": ("character_id",),
    "location": ("location_id",),
    "sublocation": ("location_id", "sublocation"),
    "prop": ("prop_id",),
    "prop_state": ("prop_id", "state_id"),
    "wardrobe_change": ("character_id", "piece", "change"),
    "transient_state": ("character_id", "state_desc"),
    "identity_observation": ("character_id", "attribute", "observed_value"),
}


def mention_key(mention: dict) -> str:
    """Return the canonical tombstone key for a mention.

    The key is a ``|``-join of ``kind`` plus the kind's required id fields in
    this fixed order:
    character.character_id; location.location_id;
    sublocation.location_id,sublocation; prop.prop_id;
    prop_state.prop_id,state_id; wardrobe_change.character_id,piece,change;
    transient_state.character_id,state_desc;
    identity_observation.character_id,attribute,observed_value.
    """

    kind = str(mention.get("kind", ""))
    fields = MENTION_KEY_FIELDS.get(kind, ())
    return "|".join([kind, *(str(mention.get(field, "")) for field in fields)])


def verify_coverage(
    ledger: dict,
    bible: dict,
    *,
    registry_lookup=None,
    tombstones: list[dict] = (),
) -> list[CoverageFinding]:
    """Validate ledger mentions against bible coverage rules R1-R9.

    ``registry_lookup`` is accepted for the future proposal/gate layer but is
    not read here: Phase 4's R3 source of truth is
    ``bible.locations[id].sublocations``.
    """

    _ = registry_lookup
    results: list[CoverageFinding] = []
    characters = _dict(bible.get("characters"))
    locations = _dict(bible.get("locations"))
    props = _dict(bible.get("props"))
    location_aliases = _location_aliases(locations)
    mentions = list(_iter_mentions(ledger))

    for mention in mentions:
        kind = str(mention.get("kind", ""))
        if kind == "character":
            char_id = str(mention.get("character_id", ""))
            if char_id not in characters:
                _append(
                    results,
                    Severity.BLOCK,
                    "coverage_r1",
                    f"character {char_id!r} is not declared in bible.characters",
                    mention,
                    tombstones,
                )

        elif kind == "location":
            location_id = str(mention.get("location_id", ""))
            if _resolve_location_id(location_id, locations, location_aliases) is None:
                _append(
                    results,
                    Severity.BLOCK,
                    "coverage_r2",
                    f"location {location_id!r} is not declared in bible.locations",
                    mention,
                    tombstones,
                )

        elif kind == "sublocation":
            raw_location_id = str(mention.get("location_id", ""))
            location_id = _resolve_location_id(raw_location_id, locations, location_aliases)
            sublocation = str(mention.get("sublocation", ""))
            if location_id is None:
                _append(
                    results,
                    Severity.BLOCK,
                    "coverage_r3",
                    f"sublocation {sublocation!r} cannot resolve unknown location {raw_location_id!r}",
                    mention,
                    tombstones,
                )
            elif sublocation not in _dict(_dict(locations[location_id]).get("sublocations")):
                _append(
                    results,
                    Severity.BLOCK,
                    "coverage_r3",
                    f"sublocation {sublocation!r} is not declared under location {location_id!r}",
                    mention,
                    tombstones,
                )

        elif kind == "prop":
            prop_id = str(mention.get("prop_id", ""))
            if prop_id not in props:
                _append(
                    results,
                    Severity.BLOCK,
                    "coverage_r4",
                    f"prop {prop_id!r} is not declared in bible.props",
                    mention,
                    tombstones,
                )

        elif kind == "prop_state":
            _validate_prop_state(results, mention, props, tombstones)

        elif kind == "wardrobe_change":
            _validate_wardrobe_change(results, mention, characters, tombstones)

        elif kind == "transient_state":
            _validate_transient_state(results, mention, characters, tombstones)

        elif kind == "identity_observation":
            _validate_identity_observation(results, mention, characters, tombstones)

    ledger_scene_prefixes = {
        str(scene.get("scene_id", "")).split("_")[0]
        for scene in (ledger.get("scenes") or [])
        if isinstance(scene, dict) and str(scene.get("scene_id", ""))
    }
    _validate_wardrobe_bijection(
        results, mentions, characters, tombstones,
        scene_prefixes=ledger_scene_prefixes,
    )
    return results


def write_coverage_report(results: list[CoverageFinding], ledger: dict, dest_dir: Path) -> Path:
    """Write a frozen markdown coverage report and return its path."""

    dest_dir = Path(dest_dir)
    dest_dir.mkdir(parents=True, exist_ok=True)
    stamp = datetime.now().strftime("%Y%m%d-%H%M%S-%f")
    path = dest_dir / f"coverage_report_{stamp}.md"
    suffix = 2
    while path.exists():
        path = dest_dir / f"coverage_report_{stamp}-{suffix}.md"
        suffix += 1

    blocks = [r for r in results if r.severity is Severity.BLOCK]
    warns = [r for r in results if r.severity is Severity.WARN]
    infos = [r for r in results if r.severity is Severity.INFO]
    mention_count = sum(
        len(scene.get("mentions") or [])
        for scene in ledger.get("scenes") or []
        if isinstance(scene, dict)
    )
    lines = [
        "# Coverage Report",
        "",
        f"- Project: {_md(str(ledger.get('project', '')))}",
        f"- Episode: {_md(str(ledger.get('episode', '')))}",
        f"- Scenes: {len(ledger.get('scenes') or [])}",
        f"- Mentions: {mention_count}",
        f"- BLOCK: {len(blocks)}",
        f"- WARN: {len(warns)}",
        f"- INFO: {len(infos)}",
        "",
    ]

    lines.extend(_findings_table("## BLOCKs", blocks))
    lines.extend(_findings_table("## WARNs", warns))
    tombstoned = [r for r in infos if r.check == "coverage_tombstoned"]
    lines.extend(_findings_table("## Tombstoned INFOs", tombstoned))

    path.write_text("\n".join(lines) + "\n", encoding="utf-8")
    return path


def _validate_prop_state(
    results: list[CoverageFinding],
    mention: dict,
    props: dict,
    tombstones: list[dict],
) -> None:
    prop_id = str(mention.get("prop_id", ""))
    state_id = str(mention.get("state_id", ""))
    prop = _dict(props.get(prop_id))
    if not prop:
        _append(
            results,
            Severity.BLOCK,
            "coverage_r5",
            f"prop_state references unknown prop {prop_id!r}",
            mention,
            tombstones,
        )
        return

    states = prop.get("states")
    if not isinstance(states, dict) or not states:
        _append(
            results,
            Severity.WARN,
            "coverage_r5",
            f"prop {prop_id!r} has no state machine declared",
            mention,
            tombstones,
        )
        return

    if state_id not in states:
        _append(
            results,
            Severity.BLOCK,
            "coverage_r5",
            f"prop state {prop_id!r}.{state_id!r} is not declared in states",
            mention,
            tombstones,
        )
        return

    if not _prop_state_reachable_by_scene(prop, state_id, str(mention.get("scene_id", ""))):
        _append(
            results,
            Severity.BLOCK,
            "coverage_r5",
            f"prop state {prop_id!r}.{state_id!r} is not reachable by scene {mention.get('scene_id')!r}",
            mention,
            tombstones,
        )


def _validate_wardrobe_change(
    results: list[CoverageFinding],
    mention: dict,
    characters: dict,
    tombstones: list[dict],
) -> None:
    char_id = str(mention.get("character_id", ""))
    character = _dict(characters.get(char_id))
    if not character:
        _append(
            results,
            Severity.BLOCK,
            "coverage_r6",
            f"wardrobe change references unknown character {char_id!r}",
            mention,
            tombstones,
        )
        return

    scene_id = str(mention.get("scene_id", ""))
    matches = [
        event
        for event in _wardrobe_events_for_character(char_id, character)
        if _scene_ref_lte(event["scene_id"], scene_id)
        and _wardrobe_event_matches_mention(event, mention)
    ]
    if not matches:
        _append(
            results,
            Severity.BLOCK,
            "coverage_r6",
            f"wardrobe change {mention_key(mention)!r} has no matching/prior phase trigger",
            mention,
            tombstones,
        )


def _validate_transient_state(
    results: list[CoverageFinding],
    mention: dict,
    characters: dict,
    tombstones: list[dict],
) -> None:
    char_id = str(mention.get("character_id", ""))
    character = _dict(characters.get(char_id))
    state_desc = str(mention.get("state_desc", ""))
    if not character or not _transient_resolved(character, state_desc):
        _append(
            results,
            Severity.WARN,
            "coverage_r7",
            f"transient state {state_desc!r} is not declared on character {char_id!r}",
            mention,
            tombstones,
        )


def _validate_identity_observation(
    results: list[CoverageFinding],
    mention: dict,
    characters: dict,
    tombstones: list[dict],
) -> None:
    char_id = str(mention.get("character_id", ""))
    character = _dict(characters.get(char_id))
    invariants = character.get("identity_invariants") if character else None
    if not isinstance(invariants, dict):
        return

    attribute = str(mention.get("attribute", ""))
    if attribute not in invariants:
        return

    expected = invariants.get(attribute)
    observed = mention.get("observed_value")
    if not _same_value(expected, observed):
        _append(
            results,
            Severity.BLOCK,
            "coverage_r8",
            f"identity observation {char_id!r}.{attribute}={observed!r} contradicts invariant {expected!r}",
            mention,
            tombstones,
        )


def _validate_wardrobe_bijection(
    results: list[CoverageFinding],
    mentions: list[dict],
    characters: dict,
    tombstones: list[dict],
    scene_prefixes: set[str] | None = None,
) -> None:
    wardrobe_mentions = [
        mention for mention in mentions if str(mention.get("kind", "")) == "wardrobe_change"
    ]
    events: list[dict] = []
    for char_id, character in characters.items():
        events.extend(_wardrobe_events_for_character(str(char_id), _dict(character)))

    # Episode scoping: the ledger is per-episode — a trigger anchored in a
    # DIFFERENT episode's scene owes this run nothing (a future EP002 trigger
    # must not phantom-block an EP001 Gate A). Prefixes come from the ledger
    # SCENES (mentions may be empty for a quiet episode — scoping must hold).
    episode_prefixes = set(scene_prefixes or ())
    if not episode_prefixes:
        episode_prefixes = {
            str(m.get("scene_id", "")).split("_")[0]
            for m in mentions
            if str(m.get("scene_id", ""))
        }
    if episode_prefixes:
        events = [
            e for e in events
            if str(e.get("scene_id", "")).split("_")[0] in episode_prefixes
        ]

    for event in events:
        if not any(
            _wardrobe_event_applies_to_mention(event, mention)
            for mention in wardrobe_mentions
        ):
            mention = _mention_from_wardrobe_event(event)
            _append(
                results,
                Severity.BLOCK,
                "coverage_r9",
                f"phantom wardrobe trigger {mention_key(mention)!r} has no ledger mention",
                mention,
                tombstones,
            )

    for mention in wardrobe_mentions:
        if not any(
            _wardrobe_event_applies_to_mention(event, mention)
            for event in events
        ):
            _append(
                results,
                Severity.BLOCK,
                "coverage_r9",
                f"unhandled wardrobe change {mention_key(mention)!r} has no phase trigger",
                mention,
                tombstones,
            )


def _wardrobe_event_applies_to_mention(event: dict, mention: dict) -> bool:
    if str(mention.get("character_id", "")) != event["character_id"]:
        return False
    if event.get("surrogate"):
        # Phase-level surrogate (no single nameable piece): the bijection can
        # only be scene-grained — exact trigger-scene match, never lte (a
        # surrogate must not swallow later, unrelated mentions).
        return str(mention.get("scene_id", "")) == event["scene_id"]
    # Real-piece events require piece + change equivalence at/after the
    # trigger scene — loose same-scene matching would hide true phantom or
    # unhandled R9 failures.
    return _scene_ref_lte(event["scene_id"], str(mention.get("scene_id", ""))) and (
        _wardrobe_event_matches_mention(event, mention)
    )


def _prop_state_reachable_by_scene(prop: dict, target_state: str, scene_id: str) -> bool:
    initial = prop.get("initial_state")
    states = prop.get("states")
    if not isinstance(initial, str) or not isinstance(states, dict) or initial not in states:
        return False

    reachable = {initial}
    changed = True
    transitions = prop.get("transitions") if isinstance(prop.get("transitions"), list) else []
    while changed:
        changed = False
        for transition in transitions:
            if not isinstance(transition, dict):
                continue
            trigger_scene = transition.get("trigger_scene")
            if isinstance(trigger_scene, str) and not _scene_ref_lte(trigger_scene, scene_id):
                continue
            frm = transition.get("from")
            to = transition.get("to")
            if isinstance(frm, str) and isinstance(to, str) and frm in reachable and to not in reachable:
                reachable.add(to)
                changed = True
            if transition.get("reversible") is True and isinstance(frm, str) and isinstance(to, str):
                if to in reachable and frm not in reachable:
                    reachable.add(frm)
                    changed = True
    return target_state in reachable


def _wardrobe_events_for_character(char_id: str, character: dict) -> list[dict]:
    events: list[dict] = []
    phases = character.get("phases") if isinstance(character.get("phases"), list) else []
    for phase in phases:
        if not isinstance(phase, dict):
            continue
        trigger = phase.get("trigger")
        if not isinstance(trigger, dict) or not isinstance(trigger.get("scene_ref"), str):
            continue
        # The phase TRIGGER is the wardrobe-change event (one per phase).
        # appearance.wardrobe is the phase's FULL appearance state — carry-
        # forward "worn" items are not deltas, and emitting an event per item
        # creates phantom coverage_r9 BLOCKs on valid bible content. The
        # event's piece is the phase's salient changed piece when exactly one
        # non-carry-forward item exists; otherwise the phase_id stands in.
        appearance = phase.get("appearance")
        wardrobe = _dict(appearance).get("wardrobe") if isinstance(appearance, dict) else []
        items = [i for i in (wardrobe if isinstance(wardrobe, list) else []) if isinstance(i, dict)]
        if not items:
            # No wardrobe content at all (e.g. a freshly drafted stub phase)
            # — nothing changed, no event owed.
            continue
        changed_items = [
            i for i in items
            if str(i.get("state", "")) in ("removed", "damaged", "torn") and str(i.get("piece", ""))
        ]
        surrogate = False
        if len(changed_items) == 1:
            piece = str(changed_items[0]["piece"])
            change = _state_to_change(str(changed_items[0]["state"]))
        else:
            # Donned changes are represented by L9 state "worn" — detect the
            # newly-donned piece as the salient worn item the trigger
            # description names. Multiple/zero candidates degrade to a
            # phase-level surrogate event (matched by scene, see
            # _wardrobe_event_applies_to_mention).
            desc = str(trigger.get("description", "")).lower()
            donned = [
                i for i in items
                if i.get("salient") is True
                and str(i.get("state", "")) == "worn"
                and str(i.get("piece", ""))
                and str(i["piece"]).lower() in desc
            ]
            if len(donned) == 1:
                piece = str(donned[0]["piece"])
                change = "donned"
            else:
                piece = str(phase.get("phase_id", "")) or "phase"
                change = "changed"
                surrogate = True
        events.append(
            {
                "kind": "wardrobe_change",
                "character_id": char_id,
                "piece": piece,
                "change": change,
                "scene_id": trigger["scene_ref"],
                "span_quote": str(trigger.get("description", "")),
                "surrogate": surrogate,
            }
        )
    return events


def _wardrobe_event_matches_mention(event: dict, mention: dict) -> bool:
    return (
        str(mention.get("character_id", "")) == event["character_id"]
        and _norm(str(mention.get("piece", ""))) == _norm(event["piece"])
        and _change_equivalent(str(mention.get("change", "")), event["change"])
    )


def _mention_from_wardrobe_event(event: dict) -> dict:
    return {
        "kind": "wardrobe_change",
        "surface_text": event["piece"],
        "character_id": event["character_id"],
        "piece": event["piece"],
        "change": event["change"],
        "scene_id": event["scene_id"],
        "span_quote": event.get("span_quote", ""),
    }


def _transient_resolved(character: dict, state_desc: str) -> bool:
    needle = _norm(state_desc)
    if not needle:
        return False
    transients = character.get("transients")
    if isinstance(transients, list):
        for transient in transients:
            if _text_matches(needle, str(transient)):
                return True

    phases = character.get("phases") if isinstance(character.get("phases"), list) else []
    for phase in phases:
        appearance = phase.get("appearance") if isinstance(phase, dict) else None
        if not isinstance(appearance, dict):
            continue
        texts: list[str] = []
        for key in ("hair_state",):
            if appearance.get(key):
                texts.append(str(appearance[key]))
        for key in ("visible_gear", "notable_marks"):
            value = appearance.get(key)
            if isinstance(value, list):
                texts.extend(str(item) for item in value)
        wardrobe = appearance.get("wardrobe")
        if isinstance(wardrobe, list):
            for item in wardrobe:
                if isinstance(item, dict):
                    texts.extend(str(item.get(key, "")) for key in ("piece", "descriptor", "state"))
        if any(_text_matches(needle, text) for text in texts):
            return True
    return False


def _append(
    results: list[CoverageFinding],
    severity: Severity,
    check: str,
    message: str,
    mention: dict,
    tombstones: list[dict],
) -> None:
    scene_id = str(mention.get("scene_id", ""))
    evidence = str(mention.get("span_quote") or mention.get("evidence") or "")
    finding = CoverageFinding(severity, check, message, mention, scene_id, evidence)
    if severity is Severity.BLOCK:
        tombstone = _matching_tombstone(mention, scene_id, tombstones)
        if tombstone:
            results.append(
                CoverageFinding(
                    Severity.INFO,
                    "coverage_tombstoned",
                    (
                        f"{check} suppressed by tombstone: "
                        f"{tombstone.get('reason', '')} "
                        f"(approved_by={tombstone.get('approved_by', '')})"
                    ).strip(),
                    mention,
                    scene_id,
                    evidence,
                )
            )
            return
    results.append(finding)


def _matching_tombstone(mention: dict, scene_id: str, tombstones: list[dict]) -> dict | None:
    key = mention_key(mention)
    for tombstone in tombstones or []:
        if not isinstance(tombstone, dict):
            continue
        if tombstone.get("mention_key") != key:
            continue
        tombstone_scene = str(tombstone.get("scene_id", ""))
        if tombstone_scene == "*" or tombstone_scene == scene_id:
            return tombstone
    return None


def _iter_mentions(ledger: dict) -> list[dict]:
    mentions: list[dict] = []
    for scene in ledger.get("scenes") or []:
        if not isinstance(scene, dict):
            continue
        scene_id = str(scene.get("scene_id", ""))
        for mention in scene.get("mentions") or []:
            if not isinstance(mention, dict):
                continue
            normalized = dict(mention)
            normalized.setdefault("scene_id", scene_id)
            mentions.append(normalized)
    return mentions


def _dict(value: Any) -> dict:
    return value if isinstance(value, dict) else {}


def _location_aliases(locations: dict) -> dict[str, str]:
    aliases: dict[str, str] = {}
    for location_id, location in locations.items():
        aliases[str(location_id)] = str(location_id)
        for alias in _dict(location).get("aliases") or []:
            aliases[str(alias)] = str(location_id)
    return aliases


def _resolve_location_id(location_id: str, locations: dict, aliases: dict[str, str]) -> str | None:
    if location_id in locations:
        return location_id
    return aliases.get(location_id)


def _scene_ref_lte(left: str, right: str) -> bool:
    left_key = _scene_sort_key(left)
    right_key = _scene_sort_key(right)
    if left_key is None or right_key is None:
        return False
    return left_key <= right_key


def _scene_sort_key(scene_id: str) -> tuple[int, int] | None:
    if not isinstance(scene_id, str):
        return None
    parts = scene_id.split("_SC", 1)
    if len(parts) != 2 or not parts[0].startswith("EP"):
        return None
    try:
        return int(parts[0][2:]), int(parts[1])
    except ValueError:
        return None


def _same_value(left: Any, right: Any) -> bool:
    if isinstance(left, list):
        return any(_same_value(item, right) for item in left)
    return _norm(str(left)) == _norm(str(right))


def _text_matches(needle_norm: str, text: str) -> bool:
    hay = _norm(text)
    return bool(hay) and (needle_norm in hay or hay in needle_norm)


def _norm(value: str) -> str:
    return " ".join(str(value).lower().replace("_", " ").replace("-", " ").split())


def _state_to_change(state: str) -> str:
    return {"worn": "donned"}.get(_norm(state), _norm(state))


def _change_equivalent(left: str, right: str) -> bool:
    aliases = {
        "wearing": "donned",
        "worn": "donned",
        "put on": "donned",
        "puts on": "donned",
        "removed": "removed",
        "removes": "removed",
        "torn": "torn",
        "tears": "torn",
        "damaged": "damaged",
    }
    left_norm = aliases.get(_norm(left), _norm(left))
    right_norm = aliases.get(_norm(right), _norm(right))
    return left_norm == right_norm


def _findings_table(title: str, findings: list[CoverageFinding]) -> list[str]:
    lines = [title, ""]
    if not findings:
        lines.extend(["None.", ""])
        return lines
    lines.extend(
        [
            "| Rule | Mention | Scene | Evidence | Message |",
            "|---|---|---|---|---|",
        ]
    )
    for finding in findings:
        lines.append(
            "| "
            + " | ".join(
                [
                    _md(finding.check),
                    _md(mention_key(finding.mention)),
                    _md(finding.scene_id),
                    _md(finding.evidence),
                    _md(finding.message),
                ]
            )
            + " |"
        )
    lines.append("")
    return lines


def _md(value: str) -> str:
    return str(value).replace("|", "\\|").replace("\n", " ")


__all__ = [
    "CoverageFinding",
    "mention_key",
    "verify_coverage",
    "write_coverage_report",
]