#!/usr/bin/env python3
"""
verify-laws gate: every emit_fallback("…") or fire_sanctioned_fallback("…")
call site across recoil/api/**, recoil/pipeline/**, and recoil/execution/**
must use a name that exists in the canonical SSOT registry at
recoil/pipeline/_lib/sanctioned_fallbacks.py (FallbackRecord shape).

Mirrors scripts/verify_laws/check_fallback_registration.mjs (TS-side parity
for Tenet 6 / Law 4 / Law 12).

Exit codes:
  0 — OK
  1 — drift (offending call sites listed)
  2 — registry parse failure
"""

from __future__ import annotations

import re
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[4]
REGISTRY_PATH = REPO_ROOT / "recoil" / "pipeline" / "_lib" / "sanctioned_fallbacks.py"

SCAN_ROOTS = [
    REPO_ROOT / "recoil" / "api",
    REPO_ROOT / "recoil" / "pipeline",
    REPO_ROOT / "recoil" / "execution",
]


def parse_registry(text: str) -> set[str]:
    """Pull every `name="…"` literal inside a FallbackRecord( … ) constructor.

    The canonical registry uses register_sanctioned_fallback(FallbackRecord(...))
    calls. We scan for the FallbackRecord constructor literal directly, which
    lets us collect every entry name regardless of call-wrapping shape.
    """
    # FallbackRecord(...name="<value>"...) — handles both ' and ", multiline.
    pattern = re.compile(
        r"FallbackRecord\([^)]*?name\s*=\s*(['\"])([^'\"]+)\1",
        re.DOTALL,
    )
    return {m.group(2) for m in pattern.finditer(text)}


def collect_all_registered_names() -> set[str]:
    """Build the full set of registered fallback names by scanning REGISTRY_PATH
    AND all non-test Python files in SCAN_ROOTS for FallbackRecord(...) constructors.

    This captures dynamically-registered fallbacks (e.g. run_shot.py) that
    live outside the canonical registry file but still go through
    register_sanctioned_fallback(FallbackRecord(...)).
    """
    names = set()
    if REGISTRY_PATH.exists():
        names |= parse_registry(REGISTRY_PATH.read_text(encoding="utf-8"))
    for root in SCAN_ROOTS:
        for path in root.rglob("*.py"):
            if path == REGISTRY_PATH:
                continue  # already read above; avoid double-parse
            if "/tests/" in path.as_posix():
                continue
            try:
                text = path.read_text(encoding="utf-8")
            except (OSError, UnicodeDecodeError):
                continue
            names |= parse_registry(text)
    return names


_CALL_RE = re.compile(
    r"(?:emit_fallback|fire_sanctioned_fallback)\(\s*(['\"])([^'\"]+)\1",
    re.DOTALL,
)


def find_call_sites(scan_root: Path | None = None) -> list[tuple[Path, int, str]]:
    """Grep emit_fallback("…") or fire_sanctioned_fallback("…") across
    configured SCAN_ROOTS (or a single overriding scan_root for tests).
    Skips the registry file itself + any tests/ directory.

    Uses a multi-line aware regex (DOTALL) so calls of the form

        fire_sanctioned_fallback(
            "name",
            scope=...,
        )

    are matched. The line number is recovered from the offset of the match
    so the operator-visible error includes a real :line.
    """
    if scan_root is not None:
        roots = [scan_root]
    else:
        roots = SCAN_ROOTS
    out: list[tuple[Path, int, str]] = []
    for root in roots:
        for path in root.rglob("*.py"):
            if path == REGISTRY_PATH:
                continue
            if "/tests/" in path.as_posix():
                continue
            try:
                text = path.read_text(encoding="utf-8")
            except (OSError, UnicodeDecodeError):
                continue
            for m in _CALL_RE.finditer(text):
                line_no = text.count("\n", 0, m.start()) + 1
                out.append((path, line_no, m.group(2)))
    return out


def main() -> int:
    if not REGISTRY_PATH.exists():
        print(f"ERROR: registry not found at {REGISTRY_PATH}", file=sys.stderr)
        return 2
    # Use the full registered-names set (canonical file + dynamic registrations
    # in consumer files) so inline register_sanctioned_fallback(...) calls in
    # e.g. run_shot.py are treated as known names.
    registry = collect_all_registered_names()
    if not registry:
        print("ERROR: REGISTRY parsed as empty (regex drift?)", file=sys.stderr)
        return 2
    sites = find_call_sites()
    bad = [(p, ln, name) for p, ln, name in sites if name not in registry]
    if bad:
        print("Tenet 6 violation: emit_fallback()/fire_sanctioned_fallback() called with name not in registry")
        print(f"Registered names: {', '.join(sorted(registry))}")
        for p, ln, name in bad:
            try:
                rel = p.relative_to(REPO_ROOT)
            except ValueError:
                rel = p
            print(f"  {rel}:{ln}  -> unregistered name: {name!r}")
        return 1
    print(
        f"OK -- {len(sites)} static call(s) match registry "
        f"({len(registry)} names)"
    )
    return 0


if __name__ == "__main__":
    sys.exit(main())
