#!/usr/bin/env python3
"""Test-debt tripwire — fail when the failing-test set GROWS beyond a committed baseline.

Background (REC-153): the harness frozen-gate guard blocks dispatched builds from
editing/creating existing test files, so refactors land production code but orphan
the tests that asserted the old behavior. The old verify pattern only diffed
new-vs-baseline failures, so the rot accumulated silently (102 stale failures by
2026-06-13). This tripwire makes the baseline an EXPLICIT committed artifact: it runs
the two canonical suites, collects the FAILED/ERROR node ids, and exits non-zero if
any failing node is NOT already in the committed baseline (i.e. the failure set grew).

It is a tripwire, not a mask: it never weakens or skips a test. When tests are
legitimately fixed, shrink the baseline file in the SAME change (run with --update).
xfail/xpass are pytest-native and do NOT count as failures here — known prod bugs are
tracked via xfail(strict) + a Linear ticket, not via this baseline.

Usage:
    python3 recoil/pipeline/tools/check_test_baseline.py          # check (CI/local gate)
    python3 recoil/pipeline/tools/check_test_baseline.py --update # rewrite the baseline

Exit codes: 0 = no new failures (count did not grow); 1 = new failure(s); 2 = harness error.
"""
from __future__ import annotations

import argparse
import subprocess
import sys
from pathlib import Path

# recoil/pipeline/tools/check_test_baseline.py -> recoil/
RECOIL_ROOT = Path(__file__).resolve().parents[2]
REPO_ROOT = RECOIL_ROOT.parent
BASELINE_PATH = RECOIL_ROOT / "pipeline" / "tests" / "_baseline_failures.txt"
SUITES = ["recoil/execution/tests", "recoil/pipeline/tests"]


def _collect_failures() -> set[str]:
    """Run the suites and return the set of FAILED/ERROR node ids."""
    proc = subprocess.run(
        [sys.executable, "-m", "pytest", *SUITES, "-q", "--no-header", "-p", "no:cacheprovider"],
        cwd=REPO_ROOT,
        env={**__import__("os").environ, "PYTHONPATH": str(REPO_ROOT)},
        capture_output=True,
        text=True,
    )
    failures: set[str] = set()
    for line in proc.stdout.splitlines():
        if line.startswith(("FAILED ", "ERROR ")):
            # "FAILED recoil/...::node - AssertionError: ..." -> keep id only
            node = line.split(" ", 1)[1].split(" - ", 1)[0].strip()
            failures.add(node)
    # A non-zero rc with zero parsed failures means the run itself broke
    # (collection error before any test). Surface that as a harness error.
    if not failures and proc.returncode not in (0, 1):
        sys.stderr.write(proc.stdout[-4000:] + "\n" + proc.stderr[-2000:] + "\n")
        raise SystemExit(2)
    return failures


def _read_baseline() -> set[str]:
    if not BASELINE_PATH.exists():
        return set()
    return {
        ln.strip()
        for ln in BASELINE_PATH.read_text().splitlines()
        if ln.strip() and not ln.startswith("#")
    }


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument("--update", action="store_true", help="rewrite the baseline to the current failing set")
    args = ap.parse_args()

    current = _collect_failures()

    if args.update:
        body = "\n".join(sorted(current))
        BASELINE_PATH.write_text(
            "# Committed test-debt baseline (REC-153). Failing FAILED/ERROR node ids that\n"
            "# are KNOWN-RED. The tripwire (check_test_baseline.py) fails if the failing set\n"
            "# grows beyond this. Shrink this file in the SAME change that fixes a test.\n"
            "# Target: empty. Known prod-bug reds use xfail(strict) + a Linear ticket, NOT this file.\n"
            + (body + "\n" if body else "")
        )
        print(f"baseline updated: {len(current)} known-red node(s) -> {BASELINE_PATH}")
        return 0

    baseline = _read_baseline()
    new_failures = sorted(current - baseline)
    fixed = sorted(baseline - current)

    if fixed:
        print(f"NOTE: {len(fixed)} baselined failure(s) now PASS — shrink the baseline (--update):")
        for n in fixed:
            print(f"  fixed: {n}")
    if new_failures:
        print(f"TRIPWIRE: {len(new_failures)} NEW failing test(s) not in the committed baseline:")
        for n in new_failures:
            print(f"  NEW FAIL: {n}")
        print("\nA refactor likely orphaned these tests. Fix them to assert current behavior")
        print("(or, for a genuine prod bug, xfail(strict) + file a Linear ticket). Do NOT add")
        print("them to the baseline to silence this.")
        return 1

    print(f"OK: {len(current)} failing node(s), all within the committed baseline (no growth).")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
