"""Drift validator for recoil/architecture/topology/nodes/_render_overlay.yaml —
the Atlas RENDER OVERLAY (migrated from the retired recoil/docs/pipeline_graph.yaml).

Anti-rot tie (SCOPED — honest limit): fails when the overlay diverges from the
modality registry or from its own internal invariants — so a *modality-level*
engine change surfaces RED. It does NOT auto-validate every node/edge/gate/fan-in
against the orchestration code: no such auto-derivation exists, and the overlay is
a hand-curated 4-agent audit projection — the SAME trust model the original
pipeline_graph.yaml always had, and the same as topology's authored .topo.yaml
nodes. Node-level fidelity is maintained by curation + provenance refs (surfaced
for human audit), not by an automated gate. Claiming more would be dishonest.
Provenance (path:line in node `tool`/`evidence`) is resolved best-effort; missing
paths are reported, not asserted (pre-existing refs include line-ranges/evidence
that the build does not own). Run build_atlas_graph.py --strict to hard-fail them.
"""
from __future__ import annotations

import re
from pathlib import Path

import pytest

yaml = pytest.importorskip("yaml")

_TOOLS = Path(__file__).resolve().parent                 # …/topology/tools
_TOPO = _TOOLS.parent                                    # …/topology
_MONO = _TOPO.parents[2]                                 # monorepo root (architecture→recoil→root)
_OVERLAY = _TOPO / "nodes" / "_render_overlay.yaml"
_REGISTRY = _MONO / "recoil" / "pipeline" / "core" / "registry.py"

_NODE_TYPES = {"input", "generation", "gate", "human_review", "decision", "store"}
_STATUSES = {"live", "substrate_only", "deprecated"}
_SENTINELS = {"external", "ssot_layer", "workflow_step", "finish_compile"}
_REF_RE = re.compile(r"([A-Za-z0-9_./-]+\.(?:py|md|json|yaml|yml|sh|html)):(\d+)")


@pytest.fixture(scope="module")
def graph():
    assert _OVERLAY.exists(), f"missing render overlay: {_OVERLAY}"
    return yaml.safe_load(_OVERLAY.read_text())


def test_parses_and_has_sections(graph):
    for key in ("meta", "nodes", "edges", "director_note_routes"):
        assert key in graph, f"overlay missing top-level '{key}'"


def test_meta_counts_match(graph):
    assert graph["meta"]["node_count"] == len(graph["nodes"])
    assert graph["meta"]["edge_count"] == len(graph["edges"])


def test_node_shape(graph):
    ids = [n["id"] for n in graph["nodes"]]
    assert len(ids) == len(set(ids)), "duplicate node ids"
    for n in graph["nodes"]:
        assert n["node_type"] in _NODE_TYPES, f"{n['id']}: bad node_type {n['node_type']}"
        assert n["status"] in _STATUSES, f"{n['id']}: bad status {n['status']}"
        if n["node_type"] == "gate":
            assert n.get("gate"), f"{n['id']}: gate node must carry a non-null gate"


def test_no_dangling_edges(graph):
    ids = {n["id"] for n in graph["nodes"]} | _SENTINELS
    for e in graph["edges"]:
        assert e["from"] in ids, f"edge from unknown node: {e['from']}"
        assert e["to"] in ids, f"edge to unknown node: {e['to']}"


def test_meta_substrate_and_human_lists_match(graph):
    subs = sorted(n["id"] for n in graph["nodes"] if n["status"] == "substrate_only")
    humans = sorted(n["id"] for n in graph["nodes"] if n["node_type"] == "human_review")
    assert sorted(graph["meta"]["substrate_only_nodes"]) == subs
    assert sorted(graph["meta"]["human_review_nodes"]) == humans


def _registry_modalities() -> set:
    text = _REGISTRY.read_text()
    return set(re.findall(r'^MODALITY_[A-Z0-9_]+\s*=\s*"([a-z0-9_]+)"', text, re.M))


def test_modalities_cover_registry(graph):
    """Every modality string in registry.py must be represented as a modality_*
    node (modality_eval stands in for the three eval_* strings). The load-bearing
    structural-drift check."""
    registry = _registry_modalities()
    assert registry, "parsed zero MODALITY_* constants — update parser or registry path"
    covered = set()
    for nid in {n["id"] for n in graph["nodes"]}:
        if not nid.startswith("modality_") or nid == "modality_registry":
            continue
        if nid == "modality_eval":
            covered |= {"eval_image_v1", "eval_video_v1", "eval_audio_v1"}
        else:
            covered.add(nid[len("modality_"):])
    missing = registry - covered
    extra = covered - registry
    assert not missing, f"registry modalities absent from overlay: {sorted(missing)}"
    assert not extra, f"overlay modalities not in registry.py: {sorted(extra)}"


def test_provenance_refs_are_wellformed(graph):
    """Every path:line provenance ref in node tool/evidence parses to a path under
    the monorepo, and its file-path portion is checked for existence. Existence is
    REPORTED (warn), NOT asserted — pre-existing evidence refs include line-ranges/
    uncertain anchors the build does not own; build_atlas_graph.py --strict hard-fails."""
    seen = 0
    unresolved = []
    for n in graph["nodes"]:
        for field in ("tool", "evidence"):
            val = n.get(field)
            if not isinstance(val, str):
                continue
            for path, _line in _REF_RE.findall(val):
                seen += 1
                assert ".." not in path, f"{n['id']}: suspicious provenance path {path}"
                if not (_MONO / path).exists():
                    unresolved.append(f"{n['id']}:{path}")
    if unresolved:
        # Reported, not asserted (warn-level) — see docstring.
        print(f"WARN: {len(unresolved)} unresolved provenance path(s): "
              f"{sorted(set(unresolved))[:8]}")
    assert seen > 0, "parsed zero provenance refs — overlay tool/evidence fields empty?"