#!/usr/bin/env python3
"""Build + drift-check the Atlas render graph (generated/atlas.render.json).

The Atlas viewer renders ONLY this generated artifact. Two SSOTs at different
altitudes, joined at a seam (NOT one structural graph):
  * _render_overlay.yaml      = render/flow SSOT (nodes, edges, gates, inputs,
                                swimlane — the flow structure topology lacks).
  * topology.full.json        = decision/capability SSOT (read only to carry the
                                loop count for reconciliation).
The overlay's node/edge/gate content is carried as-authored; this generator does
NOT validate it against topology (topology has no node-level flow edges). It
validates only the seam: provenance resolution (--strict to hard-fail) and the
overlay's own substrate/human-review meta consistency.

Output is sorted + timestamp-free so --check can diff it reproducibly.

    python3 recoil/architecture/topology/tools/build_atlas_graph.py --write
    python3 recoil/architecture/topology/tools/build_atlas_graph.py --check
    python3 recoil/architecture/topology/tools/build_atlas_graph.py --check --strict
"""
from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path

import yaml

_TOOLS = Path(__file__).resolve().parent
_TOPO = _TOOLS.parent
_MONO = _TOPO.parents[2]
_OVERLAY = _TOPO / "nodes" / "_render_overlay.yaml"
_TOPOLOGY = _TOPO / "generated" / "topology.full.json"
_OUT = _TOPO / "generated" / "atlas.render.json"

# node_type -> render shape (the brief's vocabulary)
_SHAPE = {
    "input": "chip",
    "generation": "rect",
    "gate": "diamond",
    "human_review": "diamond_person",
    "decision": "hex",
    "store": "cylinder",
}
_REF_RE = re.compile(r"([A-Za-z0-9_./-]+\.(?:py|md|json|yaml|yml|sh|html)):(\d+)(?:-\d+)?")


def _parse_provenance(node: dict) -> list[dict]:
    refs: list[dict] = []
    for field in ("tool", "evidence"):
        val = node.get(field)
        if not isinstance(val, str):
            continue
        for path, line in _REF_RE.findall(val):
            refs.append({
                "ref": f"{path}:{line}",
                "file": path,
                "line": int(line),
                "field": field,
                "resolved": (_MONO / path).exists(),
            })
    return refs


def build_render(overlay: dict, topology: dict) -> dict:
    topo_loops = {l.get("id"): l for l in topology.get("nodes", {}).get("loops", [])}
    SWIMLANE_ORDER = ["narrative", "prepro", "production", "review_export", "infra"]
    _lane_idx = {name: i for i, name in enumerate(SWIMLANE_ORDER)}
    deprecated_ids = {n["id"] for n in overlay["nodes"] if n["status"] == "deprecated"}
    nodes = []
    for src_i, n in enumerate(overlay["nodes"]):
        nodes.append({
            "id": n["id"],
            "label": n.get("label", n["id"]),
            "swimlane": n.get("phase_group", "infra"),
            "node_type": n["node_type"],
            "shape": _SHAPE[n["node_type"]],
            "status": n["status"],
            "is_human_review": n["node_type"] == "human_review",
            "is_substrate": n["status"] == "substrate_only",
            "is_deprecated": n["status"] == "deprecated",
            "gate": n.get("gate"),
            "inputs": n.get("inputs", []),
            "produces": n.get("produces", []),
            "loops": n.get("loops", []),
            "provenance": _parse_provenance(n),
            "source_index": src_i,
        })
    # Preserve the curated 4-agent flow order: bucket into swimlanes left→right,
    # keep the overlay's authored order WITHIN each lane. NEVER sort by id —
    # alphabetical order scrambles the flow and renders an incoherent atlas.
    nodes.sort(key=lambda x: (_lane_idx.get(x["swimlane"], len(SWIMLANE_ORDER)), x["source_index"]))
    # An edge touching a deprecated node is itself dead-path → flag it so the viewer
    # dashes/de-emphasises it (a dead node must not read as a live stage).
    edges = sorted(
        ({"from": e["from"], "to": e["to"], "label": e.get("label", ""),
          "is_deprecated": e["from"] in deprecated_ids or e["to"] in deprecated_ids}
         for e in overlay["edges"]),
        key=lambda x: (x["from"], x["to"], x["label"]),
    )
    return {
        "version": 1,
        "meta": {
            "node_count": overlay["meta"]["node_count"],
            "edge_count": overlay["meta"]["edge_count"],
            "swimlanes": ["narrative", "prepro", "production", "review_export", "infra"],
            "substrate_only_nodes": sorted(overlay["meta"]["substrate_only_nodes"]),
            "human_review_nodes": sorted(overlay["meta"]["human_review_nodes"]),
            "deprecated_nodes": sorted(deprecated_ids),
            "topology_loop_count": len(topo_loops),
        },
        "nodes": nodes,
        "edges": edges,
        "director_note_routes": overlay.get("director_note_routes", []),
    }


def _render() -> dict:
    overlay = yaml.safe_load(_OVERLAY.read_text())
    topology = json.loads(_TOPOLOGY.read_text())
    return build_render(overlay, topology)


def _serialize(graph: dict) -> str:
    return json.dumps(graph, indent=2, sort_keys=True, ensure_ascii=False) + "\n"


def _structural_errors(graph: dict) -> list[str]:
    errs = []
    if len(graph["nodes"]) != graph["meta"]["node_count"]:
        errs.append(f"node_count {graph['meta']['node_count']} != {len(graph['nodes'])} nodes")
    if len(graph["edges"]) != graph["meta"]["edge_count"]:
        errs.append(f"edge_count {graph['meta']['edge_count']} != {len(graph['edges'])} edges")
    humans = sorted(n["id"] for n in graph["nodes"] if n["is_human_review"])
    subs = sorted(n["id"] for n in graph["nodes"] if n["is_substrate"])
    deps = sorted(n["id"] for n in graph["nodes"] if n["is_deprecated"])
    if humans != graph["meta"]["human_review_nodes"]:
        errs.append(f"human_review_nodes mismatch: {humans} != {graph['meta']['human_review_nodes']}")
    if subs != graph["meta"]["substrate_only_nodes"]:
        errs.append(f"substrate_only_nodes mismatch: {subs} != {graph['meta']['substrate_only_nodes']}")
    if deps != graph["meta"]["deprecated_nodes"]:
        errs.append(f"deprecated_nodes mismatch: {deps} != {graph['meta']['deprecated_nodes']}")
    return errs


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    g = ap.add_mutually_exclusive_group(required=True)
    g.add_argument("--write", action="store_true", help="regenerate generated/atlas.render.json")
    g.add_argument("--check", action="store_true", help="CI: fail on drift / structural error")
    ap.add_argument("--strict", action="store_true",
                    help="with --check, also hard-fail on unresolved provenance refs")
    args = ap.parse_args()

    graph = _render()
    payload = _serialize(graph)

    if args.write:
        _OUT.write_text(payload)
        print(f"atlas.render.json written | {len(graph['nodes'])} nodes | {len(graph['edges'])} edges")
        return 0

    # --check
    errs = _structural_errors(graph)
    if not _OUT.exists():
        errs.append("generated/atlas.render.json missing — run --write")
    elif _OUT.read_text() != payload:
        errs.append("atlas.render.json STALE vs overlay/topology — run --write")
    unresolved = [r["ref"] for n in graph["nodes"] for r in n["provenance"] if not r["resolved"]]
    if unresolved:
        msg = f"{len(unresolved)} unresolved provenance ref(s): {sorted(set(unresolved))[:8]}"
        if args.strict:
            errs.append(msg)
        else:
            print(f"WARN: {msg}", file=sys.stderr)
    if errs:
        for e in errs:
            print(f"ATLAS DRIFT: {e}", file=sys.stderr)
        return 1
    print(f"atlas render OK | {len(graph['nodes'])} nodes | no drift")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
