"""Tests for nightwatch.py (Phase 1: ledger, normalization, ingest)."""

from __future__ import annotations

import json
import pathlib
import re
import shlex
import types

import pytest

from recoil.pipeline.tools import nightwatch as nw
from recoil.pipeline.tools import linear_queue as lq


# ---------------------------------------------------------------------------
# Constants.
# ---------------------------------------------------------------------------
def test_default_repo_root_resolves_to_repo_root():
    # Checkout-name-independent: DEFAULT_REPO_ROOT must be the dir that holds the
    # recoil/ package, so nightwatch.py is reachable at the expected subpath
    # beneath it. Passes in CI temp dirs / review worktrees, not only a dir
    # literally named "CLAUDE_PROJECTS".
    nightwatch_path = nw.DEFAULT_REPO_ROOT / "recoil" / "pipeline" / "tools" / "nightwatch.py"
    assert nightwatch_path.resolve() == pathlib.Path(nw.__file__).resolve()


def test_utc_now_iso_has_z_suffix():
    ts = nw.utc_now_iso()
    assert ts.endswith("Z")
    assert "+00:00" not in ts


# ---------------------------------------------------------------------------
# finding_key stability + sensitivity.
# ---------------------------------------------------------------------------
_FK_BASE = dict(
    category="SSOT",
    file="recoil/pipeline/tools/consult.py",
    claim_signature="resolve",
)


def _fk(**overrides) -> str:
    args = {**_FK_BASE, **overrides}
    return nw.derive_finding_key(
        args["category"],
        args["file"],
        args["claim_signature"],
    )


def test_finding_key_stable_when_only_line_changes():
    # finding_key has no line input at all, so it cannot vary with line.
    # Two derivations with identical inputs must match.
    assert _fk() == _fk()


def test_finding_key_changes_when_category_changes():
    assert _fk() != _fk(category="bug")


def test_finding_key_category_case_insensitive():
    assert _fk(category="SSOT") == _fk(category="ssot")


def test_finding_key_changes_when_file_changes():
    assert _fk() != _fk(file="recoil/pipeline/tools/other.py")


def test_finding_key_changes_when_claim_signature_changes():
    assert _fk() != _fk(claim_signature="other_resolver")


def test_claim_signature_title_independent_and_distinguishing():
    assert nw.derive_claim_signature is lq.derive_claim_signature
    assert lq.derive_claim_signature("The `asset_kind_dir` call fails here.") == (
        lq.derive_claim_signature("Later prose still cites `asset_kind_dir`.")
    )
    assert lq.derive_claim_signature("The `asset_kind_dir` call fails.") != (
        lq.derive_claim_signature("The `resolve_entity_refs` call fails.")
    )
    assert (
        lq.derive_claim_signature('calls `_project_paths.asset_kind_dir("identity", cid)`')
        == "asset_kind_dir"
    )
    assert lq.derive_claim_signature("Only `_private_z` and `_private_a` are cited.") == (
        "_private_a"
    )
    assert lq.derive_claim_signature("calls `ProjectPaths.resolve_ref(...)`") == (
        "resolve_ref"
    )
    assert lq.derive_claim_signature("calls `ProjectPaths.resolve_hero(...)`") == (
        "resolve_hero"
    )
    assert lq.derive_claim_signature("uses `ProjectPaths` directly") == "projectpaths"
    assert (
        lq.derive_claim_signature('calls `ProjectPaths.resolve_ref("aaa_flag")`')
        == "resolve_ref"
    )

    generic = lq.derive_claim_signature("The value is `True`, which skips validation.")
    assert generic.startswith("prose:")
    a = lq.derive_claim_signature("The value is `True`, which skips validation.")
    b = lq.derive_claim_signature("The value is `True`, which skips cleanup.")
    assert a != b
    prose_a = lq.derive_claim_signature("missing timeout guard in the worker")
    prose_b = lq.derive_claim_signature("missing retry guard in the worker")
    assert prose_a.startswith("prose:")
    assert prose_b.startswith("prose:")
    assert prose_a != prose_b
    assert prose_a == lq.derive_claim_signature("missing timeout guard in the worker")
    assert lq.derive_claim_signature("") == ""
    assert lq.derive_claim_signature(None) == ""  # type: ignore[arg-type]


WAN_EVIDENCE_A = (
    '_resolve_wan_character_refs() says it checks the canonical layout, then executes '
    '`char_dir = _project_paths.asset_kind_dir("identity", cid)`. The manifest lists '
    '`recoil/core/paths.py::ProjectPaths.asset_kind_dir` under deprecated_paths; the '
    'implementation raises DeprecatedPathAPIError, so the live `--wan-refs` path at '
    'lines 2927-2943 fails before it can use either canonical refs or client fallback'
)
WAN_EVIDENCE_B = (
    '_resolve_wan_character_refs documents an obsolete lookup, "Canonical Tartarus '
    'pipeline output: assets/identity/{cid}/{cid}_front.{ext}" (lines 774-776), then '
    'executes `char_dir = _project_paths.asset_kind_dir("identity", cid)` at line 792. '
    'The manifest says v3 refs live under `assets/{char,loc,prop}/<slug>/base/pool/...` '
    'and `asset_kind_dir()` is deprecated/raising, so the Wan R2V CLI path can fail'
)


def test_finding_key_stable_across_title_rephrasing():
    sig_a = lq.derive_claim_signature(WAN_EVIDENCE_A)
    sig_b = lq.derive_claim_signature(WAN_EVIDENCE_B)
    assert sig_a == "asset_kind_dir"
    assert sig_b == "asset_kind_dir"
    assert nw.derive_finding_key("bug", "recoil/pipeline/tools/dispatch_cli.py", sig_a) == (
        nw.derive_finding_key("bug", "recoil/pipeline/tools/dispatch_cli.py", sig_b)
    )


def test_finding_key_distinct_for_distinct_claims_same_file():
    file_rel = "recoil/pipeline/tools/dispatch_cli.py"
    evidences = [
        ("bug", "Wan path calls `asset_kind_dir` and fails."),
        (
            "ssot",
            "Forked resolver `_resolve_client_frontal()` bypasses `resolve_entity_refs` "
            "and `ProjectPaths.resolve_ref`.",
        ),
        (
            "bug",
            'R2V builds `PayloadContext(modality="r2v_multi", cid=cid)`, stamps '
            '`r2v_payload["r2v_multi"] = True`, and calls '
            '`dispatch("video_i2v", r2v_payload, context=ctx)`.',
        ),
    ]
    keys = {
        nw.derive_finding_key(cat, file_rel, lq.derive_claim_signature(evidence))
        for cat, evidence in evidences
    }
    assert len(keys) == 3


def test_finding_key_distinct_for_distinct_prose_only_claims():
    file_rel = "recoil/pipeline/tools/dispatch_cli.py"
    sig_a = lq.derive_claim_signature("missing timeout guard in worker")
    sig_b = lq.derive_claim_signature("missing retry guard in worker")
    assert sig_a.startswith("prose:")
    assert sig_b.startswith("prose:")
    assert sig_a != sig_b
    assert nw.derive_finding_key("bug", file_rel, sig_a) != (
        nw.derive_finding_key("bug", file_rel, sig_b)
    )
    assert lq.derive_claim_signature("loads config/a.yml at startup") != (
        lq.derive_claim_signature("loads config/b.yml at startup")
    )
    assert lq.derive_claim_signature("missing guard at line 12") == (
        lq.derive_claim_signature("missing guard at line 88")
    )


def test_claim_signature_known_collapse_same_public_symbol():
    # JT flag 1 / OFF default characterization: two distinct bugs can share one
    # lossy in-key signature. Phase 1b surfaces the grouped observations in the
    # report's Possible collapse section so the shared key is visible to humans.
    file_rel = "recoil/pipeline/tools/dispatch_cli.py"
    key_a = nw.derive_finding_key(
        "bug", file_rel, lq.derive_claim_signature("`asset_kind_dir` raises too early.")
    )
    key_b = nw.derive_finding_key(
        "bug", file_rel, lq.derive_claim_signature("`asset_kind_dir` returns stale dirs.")
    )
    assert key_a == key_b


# ---------------------------------------------------------------------------
# observation_id sensitivity.
# ---------------------------------------------------------------------------
def test_observation_id_changes_across_run_report_evidence():
    base = nw.derive_observation_id("run-A", "head1", "report-A", "rec1", "ev1")
    assert base != nw.derive_observation_id("run-B", "head1", "report-A", "rec1", "ev1")
    assert base != nw.derive_observation_id("run-A", "head1", "report-B", "rec1", "ev1")
    assert base != nw.derive_observation_id("run-A", "head1", "report-A", "rec1", "ev2")
    # Identical inputs -> identical id.
    assert base == nw.derive_observation_id("run-A", "head1", "report-A", "rec1", "ev1")


# ---------------------------------------------------------------------------
# normalize_text.
# ---------------------------------------------------------------------------
def test_normalize_text_collapses_and_strips():
    assert nw.normalize_text("  a   b\t c \n d ") == "a b c d"


def test_normalize_text_handles_none_and_empty():
    assert nw.normalize_text("") == ""
    assert nw.normalize_text(None) == ""  # type: ignore[arg-type]


# ---------------------------------------------------------------------------
# Anchor extraction.
# ---------------------------------------------------------------------------
def test_anchor_prefers_longest_quoted_span_present_in_file(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text("def resolve(self):\n    return self.path\n", encoding="utf-8")
    evidence = "the `resolve` method and `def resolve(self):` are wrong"
    anchor, blocked = nw.extract_normalized_anchor(src, evidence, advisory_line=1)
    assert anchor == "def resolve(self):"
    assert blocked == []


def test_anchor_falls_back_to_excerpt_when_no_file(tmp_path):
    evidence = "some prose with no backticked code"
    anchor, blocked = nw.extract_normalized_anchor(None, evidence, advisory_line=None)
    assert anchor == "some prose with no backticked code"
    assert blocked == ["anchor_not_found"]


def test_anchor_marks_missing_file(tmp_path):
    missing = tmp_path / "nope.py"
    anchor, blocked = nw.extract_normalized_anchor(missing, "evidence text", advisory_line=5)
    assert "missing_file" in blocked
    assert "anchor_not_found" in blocked


def test_anchor_window_finds_symbol_def(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text(
        "import os\n\n\nclass Widget:\n    x = 1\n    y = 2\n",
        encoding="utf-8",
    )
    # No quoted span; advisory line near the class def.
    anchor, blocked = nw.extract_normalized_anchor(src, "broken state here", advisory_line=5)
    assert anchor == "class Widget:"
    assert blocked == []


# ---------------------------------------------------------------------------
# Ledger I/O.
# ---------------------------------------------------------------------------
def test_append_event_preserves_existing_lines(tmp_path):
    ledger = tmp_path / "sub" / "events.jsonl"
    ledger.parent.mkdir(parents=True, exist_ok=True)
    sentinel = '{"sentinel": true}'
    with open(ledger, "w", encoding="utf-8") as fh:
        fh.write(sentinel + "\n")

    nw.append_event(ledger, {"hello": "world"})

    lines = ledger.read_text(encoding="utf-8").splitlines()
    assert lines[0] == sentinel
    assert json.loads(lines[1]) == {"hello": "world"}


def test_append_event_creates_parent_and_writes_valid_jsonl(tmp_path):
    ledger = tmp_path / "a" / "b" / "events.jsonl"
    nw.append_event(ledger, {"n": 1})
    nw.append_event(ledger, {"n": 2})
    lines = ledger.read_text(encoding="utf-8").splitlines()
    assert len(lines) == 2
    for line in lines:
        json.loads(line)  # must not raise


def test_load_events_missing_returns_empty(tmp_path):
    assert nw.load_events(tmp_path / "absent.jsonl") == []


def test_load_events_tolerates_trailing_blank_lines(tmp_path):
    ledger = tmp_path / "events.jsonl"
    with open(ledger, "w", encoding="utf-8") as fh:
        fh.write('{"a": 1}\n\n{"b": 2}\n\n\n')
    events = nw.load_events(ledger)
    assert events == [{"a": 1}, {"b": 2}]


# ---------------------------------------------------------------------------
# codex_audit ingestion.
# ---------------------------------------------------------------------------
def _write_findings(tmp_path, findings, head_sha="abc1234"):
    audit = tmp_path / "findings-recoil-pipeline-tools-20260603-000000.json"
    audit.write_text(
        json.dumps({"subsystem": "recoil/pipeline/tools", "head_sha": head_sha, "findings": findings}),
        encoding="utf-8",
    )
    return audit


def test_ingest_codex_audit_derives_identifiers(tmp_path):
    finding = {
        "severity": "HIGH",
        "category": "SSOT",
        "law_or_rule": "SSOT-1",
        "file": "recoil/pipeline/tools/consult.py",
        "line": 63,
        "title": "Duplicated resolver",
        "evidence": "two homes for the path",
        "recommendation": "merge them",
        "effort": "M",
        "confidence": "high",
        # Deliberate extra keys that must be ignored:
        "finding_key": "SHOULD_BE_IGNORED",
        "observation_id": "SHOULD_BE_IGNORED",
    }
    audit = _write_findings(tmp_path, [finding])
    events = nw.ingest_codex_audit(audit, "nightwatch-run", nw.DEFAULT_REPO_ROOT)
    assert len(events) == 1
    ev = events[0]
    assert ev["finding_key"] and ev["finding_key"] != "SHOULD_BE_IGNORED"
    assert ev["observation_id"] and ev["observation_id"] != "SHOULD_BE_IGNORED"
    assert ev["event_type"] == "observed"
    assert ev["schema_version"] == 1
    assert ev["event_id"].startswith("sha256:")
    p = ev["payload"]
    assert p["source_type"] == "codex_audit"
    assert p["category"] == "ssot"
    assert p["risk_class"] == "escalation"
    assert p["classification"] == "would_escalate"
    assert p["file"] == "recoil/pipeline/tools/consult.py"
    # head_sha from the JSON root is used.
    assert ev["head_sha"] == "abc1234"


def test_ingest_codex_audit_ignores_line_as_identity(tmp_path):
    src = tmp_path / "target.py"
    src.write_text("def resolve(self):\n    return self.path\n", encoding="utf-8")

    def finding(line):
        return {
            "severity": "HIGH",
            "category": "bug",
            "law_or_rule": None,
            "file": "target.py",
            "line": line,
            "title": "issue",
            "evidence": "the `def resolve(self):` is wrong",
            "recommendation": "fix it",
            "effort": "S",
            "confidence": "high",
        }

    audit = _write_findings(tmp_path, [finding(1), finding(2)])
    events = nw.ingest_codex_audit(audit, "run-x", tmp_path)
    assert len(events) == 2
    # Same anchor + claim + category + file + rule -> same finding_key despite line diff.
    assert events[0]["finding_key"] == events[1]["finding_key"]
    # But the per-observation id differs because source_record_digest differs (line is in the record).
    assert events[0]["observation_id"] != events[1]["observation_id"]


def test_ingest_codex_audit_efficiency_is_report(tmp_path):
    finding = {
        "severity": "LOW",
        "category": "efficiency",
        "law_or_rule": None,
        "file": "recoil/pipeline/tools/consult.py",
        "line": 10,
        "title": "redundant IO",
        "evidence": "reads the file twice",
        "recommendation": "cache it",
        "effort": "S",
        "confidence": "medium",
    }
    audit = _write_findings(tmp_path, [finding])
    events = nw.ingest_codex_audit(audit, "run", nw.DEFAULT_REPO_ROOT)
    p = events[0]["payload"]
    assert p["risk_class"] == "report"
    assert p["classification"] == "report"


def test_ingest_codex_audit_finding_key_title_independent(tmp_path):
    def finding(title, evidence):
        return {
            "severity": "HIGH",
            "category": "bug",
            "law_or_rule": None,
            "file": "mod.py",
            "line": 1,
            "title": title,
            "evidence": evidence,
            "recommendation": "fix",
        }

    audit = _write_findings(
        tmp_path,
        [
            finding("old title", "The `asset_kind_dir` call fails."),
            finding("new title", "Different prose still cites `asset_kind_dir`."),
            finding("other title", "The `resolve_entity_refs` call fails."),
        ],
    )
    events = nw.ingest_codex_audit(audit, "run", tmp_path)
    assert events[0]["finding_key"] == events[1]["finding_key"]
    assert events[0]["finding_key"] != events[2]["finding_key"]


def test_ingest_codex_audit_empty_evidence_finding_keys_distinct(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text(
        "def foo_a():\n"
        "    return 1\n"
        + ("\n" * 30)
        + "def foo_b():\n"
        "    return 2\n",
        encoding="utf-8",
    )
    anchor_a, _ = nw.extract_normalized_anchor(src, "", 2)
    anchor_b, _ = nw.extract_normalized_anchor(src, "", 34)
    assert anchor_a
    assert anchor_b
    assert anchor_a != anchor_b

    def finding(line):
        return {
            "severity": "HIGH",
            "category": "bug",
            "law_or_rule": None,
            "file": "mod.py",
            "line": line,
            "title": "empty evidence",
            "evidence": "",
            "recommendation": "fix",
        }

    audit = _write_findings(tmp_path, [finding(2), finding(34)])
    events = nw.ingest_codex_audit(audit, "run", tmp_path)
    assert len(events) == 2
    assert events[0]["payload"]["normalized_anchor"] == anchor_a
    assert events[1]["payload"]["normalized_anchor"] == anchor_b
    assert events[0]["finding_key"] != events[1]["finding_key"]


# ---------------------------------------------------------------------------
# hygiene queue ingestion.
# ---------------------------------------------------------------------------
def test_ingest_hygiene_queue_parses_open_item(tmp_path):
    hygiene = tmp_path / "hygiene-queue.md"
    hygiene.write_text(
        "# Hygiene Queue\n\n"
        "Some intro prose | with a pipe but not an open item.\n\n"
        "## Open\n"
        "- 2026-06-02 | recoil/pipeline/tools/consult.py:63,66 | ruff E402 (import not at top) | "
        "pre-existing. Fix: noqa.\n",
        encoding="utf-8",
    )
    events = nw.ingest_hygiene_queue(hygiene, "run-h", nw.DEFAULT_REPO_ROOT)
    assert len(events) == 1
    p = events[0]["payload"]
    assert p["category"] == "hygiene"
    assert p["risk_class"] == "mechanical"
    assert p["classification"] == "would_fix"
    assert p["file"] == "recoil/pipeline/tools/consult.py"
    assert p["line"] == 63  # first integer of "63,66"
    assert p["law_or_rule"] is None
    assert events[0]["finding_key"]
    assert events[0]["observation_id"]


def test_ingest_hygiene_queue_ignores_non_item_lines(tmp_path):
    hygiene = tmp_path / "hygiene-queue.md"
    hygiene.write_text(
        "## Open\n"
        "- not a fileline | only two | fields\n"  # only 2 separators -> ignored
        "- 2026-06-01 | path/to/file.py:12 | issue text | why deferred\n",
        encoding="utf-8",
    )
    events = nw.ingest_hygiene_queue(hygiene, "run", nw.DEFAULT_REPO_ROOT)
    assert len(events) == 1
    assert events[0]["payload"]["file"] == "path/to/file.py"
    assert events[0]["payload"]["line"] == 12


def test_ingest_hygiene_queue_finding_key_title_independent(tmp_path):
    hygiene = tmp_path / "hygiene-queue.md"
    hygiene.write_text(
        "## Open\n"
        "- 2026-06-01 | mod.py:1 | The `asset_kind_dir` call fails | fix A\n"
        "- 2026-06-01 | mod.py:2 | Different prose still cites `asset_kind_dir` | fix B\n"
        "- 2026-06-01 | mod.py:3 | The `resolve_entity_refs` call fails | fix C\n",
        encoding="utf-8",
    )
    events = nw.ingest_hygiene_queue(hygiene, "run", tmp_path)
    assert events[0]["finding_key"] == events[1]["finding_key"]
    assert events[0]["finding_key"] != events[2]["finding_key"]


def test_ingest_hygiene_queue_empty_evidence_finding_keys_distinct(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text(
        "def foo_a():\n"
        "    return 1\n"
        + ("\n" * 30)
        + "def foo_b():\n"
        "    return 2\n",
        encoding="utf-8",
    )
    anchor_a, _ = nw.extract_normalized_anchor(src, "", 2)
    anchor_b, _ = nw.extract_normalized_anchor(src, "", 34)
    assert anchor_a
    assert anchor_b
    assert anchor_a != anchor_b

    hygiene = tmp_path / "hygiene-queue.md"
    hygiene.write_text(
        "## Open\n"
        "- 2026-06-01 | mod.py:2 |  | why A\n"
        "- 2026-06-01 | mod.py:34 |  | why B\n",
        encoding="utf-8",
    )
    events = nw.ingest_hygiene_queue(hygiene, "run", tmp_path)
    assert len(events) == 2
    assert events[0]["payload"]["normalized_anchor"] == anchor_a
    assert events[1]["payload"]["normalized_anchor"] == anchor_b
    assert events[0]["finding_key"] != events[1]["finding_key"]


# ---------------------------------------------------------------------------
# ruff ingestion (monkeypatched subprocess).
# ---------------------------------------------------------------------------
def test_ingest_ruff_parses_despite_nonzero_exit(tmp_path, monkeypatch):
    ruff_json = json.dumps(
        [
            {
                "code": "E402",
                "message": "module import not at top of file",
                "filename": str(tmp_path / "mod.py"),
                "location": {"row": 5, "column": 1},
                "fix": None,
            }
        ]
    )

    def fake_run(cmd, **kwargs):
        return types.SimpleNamespace(stdout=ruff_json, stderr="", returncode=1)

    monkeypatch.setattr(nw.subprocess, "run", fake_run)

    events = nw.ingest_ruff(tmp_path, "run-ruff")
    assert len(events) == 1
    p = events[0]["payload"]
    assert p["source_type"] == "scanner"
    assert p["category"] == "scanner"
    assert p["risk_class"] == "mechanical"
    assert p["classification"] == "would_fix"
    assert p["law_or_rule"] == "E402"
    assert p["line"] == 5


def test_ingest_ruff_returns_empty_on_unparseable(tmp_path, monkeypatch):
    def fake_run(cmd, **kwargs):
        return types.SimpleNamespace(stdout="not json", stderr="", returncode=1)

    monkeypatch.setattr(nw.subprocess, "run", fake_run)
    assert nw.ingest_ruff(tmp_path, "run") == []


def test_ingest_ruff_returns_empty_when_binary_missing(tmp_path, monkeypatch):
    def fake_run(cmd, **kwargs):
        raise FileNotFoundError("ruff not installed")

    monkeypatch.setattr(nw.subprocess, "run", fake_run)
    assert nw.ingest_ruff(tmp_path, "run") == []


def test_ingest_ruff_finding_key_distinct_per_violation(tmp_path, monkeypatch):
    def run_items(items):
        def fake_run(cmd, **kwargs):
            return types.SimpleNamespace(stdout=json.dumps(items), stderr="", returncode=1)

        monkeypatch.setattr(nw.subprocess, "run", fake_run)
        return nw.ingest_ruff(tmp_path, "run-ruff")

    same_message_rows = run_items([
        {
            "code": "E501",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 12, "column": 1},
        },
        {
            "code": "E501",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 88, "column": 1},
        },
    ])
    assert len({e["finding_key"] for e in same_message_rows}) == 2

    different_codes = run_items([
        {
            "code": "E501",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 12, "column": 1},
        },
        {
            "code": "E402",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 12, "column": 1},
        },
    ])
    assert len({e["finding_key"] for e in different_codes}) == 2

    different_columns = run_items([
        {
            "code": "E501",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 12, "column": 1},
        },
        {
            "code": "E501",
            "message": "Line too long (105 > 100)",
            "filename": "missing.py",
            "location": {"row": 12, "column": 9},
        },
    ])
    assert len({e["finding_key"] for e in different_columns}) == 2


# ---------------------------------------------------------------------------
# Subcommand stubs.
# ---------------------------------------------------------------------------
def test_report_no_longer_raises_not_implemented(tmp_path):
    # Empty ledger -> report is a no-op that returns 0 and prints "no confirmed".
    ledger = tmp_path / "events.jsonl"
    parser = nw.build_parser()
    args = parser.parse_args(["report", "--ledger", str(ledger)])
    assert args.func(args) == 0


def test_verify_no_longer_raises_not_implemented(tmp_path):
    # Empty ledger -> verify is a no-op that returns 0 (does not raise).
    ledger = tmp_path / "events.jsonl"
    parser = nw.build_parser()
    args = parser.parse_args(
        ["verify", "--repo-root", str(tmp_path), "--ledger", str(ledger)]
    )
    assert args.func(args) == 0


# ---------------------------------------------------------------------------
# End-to-end ingest into a ledger.
# ---------------------------------------------------------------------------
def test_cmd_ingest_appends_valid_jsonl(tmp_path, monkeypatch):
    # Build an audit dir with one findings file and a hygiene queue.
    audit_dir = tmp_path / "audit"
    audit_dir.mkdir()
    (audit_dir / "findings-x-20260603-000000.json").write_text(
        json.dumps(
            {
                "subsystem": "recoil/pipeline/tools",
                "head_sha": "deadbee",
                "findings": [
                    {
                        "severity": "HIGH",
                        "category": "SSOT",
                        "law_or_rule": "SSOT-1",
                        "file": "recoil/pipeline/tools/consult.py",
                        "line": 1,
                        "title": "t",
                        "evidence": "e",
                        "recommendation": "r",
                        "effort": "S",
                        "confidence": "high",
                    }
                ],
            }
        ),
        encoding="utf-8",
    )
    hygiene = tmp_path / "hygiene.md"
    hygiene.write_text(
        "## Open\n- 2026-06-01 | path/to/file.py:12 | issue | why\n", encoding="utf-8"
    )
    ledger = tmp_path / "ledger" / "events.jsonl"

    parser = nw.build_parser()
    args = parser.parse_args(
        [
            "ingest",
            "--repo-root", str(nw.DEFAULT_REPO_ROOT),
            "--ledger", str(ledger),
            "--audit-dir", str(audit_dir),
            "--hygiene-queue", str(hygiene),
        ]
    )
    rc = args.func(args)
    assert rc == 0

    events = nw.load_events(ledger)
    assert len(events) == 2
    sources = {e["payload"]["source_type"] for e in events}
    assert sources == {"codex_audit", "hygiene_queue"}
    for e in events:
        assert e["event_id"].startswith("sha256:")
        assert e["judgment_source"] == "nightwatch_ingest"


def test_cmd_ingest_skips_missing_audit_without_crash(tmp_path):
    ledger = tmp_path / "events.jsonl"
    parser = nw.build_parser()
    args = parser.parse_args(
        [
            "ingest",
            "--repo-root", str(nw.DEFAULT_REPO_ROOT),
            "--ledger", str(ledger),
            "--audit-dir", str(tmp_path / "nonexistent"),
            "--hygiene-queue", str(tmp_path / "nonexistent.md"),
        ]
    )
    rc = args.func(args)
    assert rc == 0
    assert nw.load_events(ledger) == []


# ---------------------------------------------------------------------------
# Phase 3: Verify Gate.
# ---------------------------------------------------------------------------
def _observed(file=None, normalized_anchor="", line=7, evidence="some evidence",
              observation_id="obs-1", finding_key="fk-1", run_id="run-1",
              blocked_reason=None, category="ssot", law_or_rule=None,
              title="a finding", recommendation="do the thing"):
    """Build a minimal observed event for verify tests."""
    return {
        "schema_version": 1,
        "event_ts": "2026-06-04T00:00:00Z",
        "event_type": "observed",
        "run_id": run_id,
        "repo_root": "/tmp/repo",
        "head_sha": "abc1234",
        "finding_key": finding_key,
        "observation_id": observation_id,
        "judgment_source": "nightwatch_ingest",
        "event_id": "sha256:fake",
        "payload": {
            "source_type": "codex_audit",
            "file": file,
            "line": line,
            "normalized_anchor": normalized_anchor,
            "title": title,
            "evidence": evidence,
            "recommendation": recommendation,
            "category": category,
            "law_or_rule": law_or_rule,
            "claim_fingerprint": nw.derive_claim_fingerprint(title, evidence, recommendation),
            "blocked_reason": blocked_reason or [],
        },
    }


def _assert_prompt_grants_read_only_file_access(prompt: str):
    assert re.search(r"read-only", prompt, re.I)
    assert re.search(r"open the cited file|open the file", prompt, re.I)


def test_build_verify_prompt_grants_file_reading():
    prompt = nw.build_verify_prompt(
        _observed(file="mod.py", normalized_anchor="class Widget:"),
        {"deterministic_status": "current"},
    )

    _assert_prompt_grants_read_only_file_access(prompt)
    assert "STRICT CONSTRAINTS" not in prompt
    assert "Constraints on your OUTPUT" in prompt
    assert "do not restrict reading" in prompt


def test_replay_anchor_not_found_abstains_not_stale(tmp_path):
    """anchor_not_found findings must replay to abstain, NOT stale (spec rule 4)."""
    src = tmp_path / "mod.py"
    src.write_text("class Widget:\n    x = 1\n", encoding="utf-8")
    # normalized_anchor is a fallback evidence excerpt not present in the file,
    # and the ingest stage flagged blocked_reason=["anchor_not_found"].
    ev = _observed(file="mod.py", normalized_anchor="Refactor this thing",
                   blocked_reason=["anchor_not_found"])
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "abstain"
    assert "anchor_not_found" in replay["reason"]


def test_verify_anchor_not_found_abstains_deterministically(tmp_path, monkeypatch):
    """An anchor_not_found finding deterministically abstains — no model call."""
    called = []
    monkeypatch.setattr(nw, "run_codex_consultation", lambda **k: called.append(k) or "{}")
    src = tmp_path / "mod.py"
    src.write_text("class Widget:\n    x = 1\n", encoding="utf-8")
    ev = _observed(file="mod.py", normalized_anchor="ungroundable note",
                   blocked_reason=["anchor_not_found"])
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "abstain"
    assert verified["judgment_source"] == "deterministic"
    assert verified["payload"]["model_confidence"] is None
    assert called == []  # model NOT consulted


def test_ingest_ruff_path_traversal_not_read(tmp_path, monkeypatch):
    """ruff findings outside repo_root must be path_traversal-blocked, never read."""
    fake_ruff = [{
        "code": "E999", "message": "boom",
        "filename": "/etc/passwd", "location": {"row": 1},
    }]

    class _Proc:
        returncode = 1
        stdout = json.dumps(fake_ruff)

    monkeypatch.setattr(nw.subprocess, "run", lambda *a, **k: _Proc())
    events = nw.ingest_ruff(tmp_path, run_id="r1")
    assert len(events) == 1
    payload = events[0]["payload"]
    assert "path_traversal" in payload["blocked_reason"]
    # The external file's contents must NOT have been read into the anchor.
    assert "root:" not in payload["normalized_anchor"]


def test_replay_anchor_not_found_abstains_when_file_missing(tmp_path):
    """anchor_not_found dominates file-missing: abstain, not stale."""
    ev = _observed(file="gone.py", normalized_anchor="excerpt",
                   blocked_reason=["anchor_not_found"])
    r = nw.replay_observation(ev, tmp_path)
    assert r["deterministic_status"] == "abstain"


def test_replay_anchor_not_found_abstains_when_no_file(tmp_path):
    """anchor_not_found with no file -> abstain (must NOT reach 'current'/the model)."""
    ev = _observed(file=None, normalized_anchor="excerpt",
                   blocked_reason=["anchor_not_found"])
    r = nw.replay_observation(ev, tmp_path)
    assert r["deterministic_status"] == "abstain"


def test_replay_traversal_rejects_even_with_anchor_not_found(tmp_path):
    """Path traversal must REJECT, not abstain — a traversal finding also carries
    anchor_not_found, and the security reject must win."""
    ev = _observed(file="../../etc/passwd", normalized_anchor="root",
                   blocked_reason=["path_traversal", "anchor_not_found"])
    r = nw.replay_observation(ev, tmp_path)
    assert r["deterministic_status"] == "rejected"


def test_ingest_ruff_relative_path_resolved_under_repo(tmp_path, monkeypatch):
    """A relative ruff filename resolves under repo_root (not path_traversal),
    regardless of the process cwd."""
    (tmp_path / "pkg").mkdir()
    (tmp_path / "pkg" / "mod.py").write_text("x = 1\n", encoding="utf-8")
    fake_ruff = [{"code": "F401", "message": "unused", "filename": "pkg/mod.py",
                  "location": {"row": 1}}]

    class _Proc:
        returncode = 1
        stdout = json.dumps(fake_ruff)

    monkeypatch.setattr(nw.subprocess, "run", lambda *a, **k: _Proc())
    events = nw.ingest_ruff(tmp_path, run_id="r1")
    assert len(events) == 1
    payload = events[0]["payload"]
    assert "path_traversal" not in payload["blocked_reason"]
    assert payload["file"] == "pkg/mod.py"


# --- replay_observation -----------------------------------------------------
def test_replay_marks_missing_file_stale(tmp_path):
    ev = _observed(file="some/nonexistent.py", normalized_anchor="def foo():")
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "stale"
    assert replay["reason"] == "file missing"


def test_replay_line_is_advisory(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text("def resolve(self):\n    return self.path\n", encoding="utf-8")
    ev_a = _observed(file="mod.py", normalized_anchor="def resolve(self):", line=1)
    ev_b = _observed(file="mod.py", normalized_anchor="def resolve(self):", line=999)
    assert nw.replay_observation(ev_a, tmp_path)["deterministic_status"] == "current"
    assert nw.replay_observation(ev_b, tmp_path)["deterministic_status"] == "current"


def test_replay_rejects_path_traversal(tmp_path):
    ev = _observed(file="../../etc/passwd", normalized_anchor="root")
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "rejected"
    assert replay["reason"] == "path outside repo"


def test_replay_marks_anchor_drift_stale(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text("def resolve(self):\n    return self.path\n", encoding="utf-8")
    ev = _observed(file="mod.py", normalized_anchor="def vanished(self):")
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "stale"
    assert replay["reason"] == "anchor drift"


def test_replay_current_when_anchor_present(tmp_path):
    src = tmp_path / "mod.py"
    src.write_text("class Widget:\n    x = 1\n", encoding="utf-8")
    ev = _observed(file="mod.py", normalized_anchor="class Widget:")
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "current"
    assert replay["anchor_present"] is True


def test_replay_no_file_abstains(tmp_path):
    # A no-file finding can't be deterministically grounded -> abstain, never
    # routed to the model (and real ingest always flags such findings anyway).
    ev = _observed(file=None, normalized_anchor="")
    replay = nw.replay_observation(ev, tmp_path)
    assert replay["deterministic_status"] == "abstain"


# --- check_duplicate_pr -----------------------------------------------------
def test_check_duplicate_pr_unknown_when_gh_missing(tmp_path, monkeypatch):
    import shutil

    monkeypatch.setattr(shutil, "which", lambda name: None)
    ev = _observed(file="mod.py")
    assert nw.check_duplicate_pr(ev, tmp_path) == "unknown"


def test_check_duplicate_pr_unknown_on_subprocess_error(tmp_path, monkeypatch):
    import shutil

    monkeypatch.setattr(shutil, "which", lambda name: "/usr/bin/gh")

    def boom(*a, **k):
        raise FileNotFoundError("gh vanished")

    monkeypatch.setattr(nw.subprocess, "run", boom)
    ev = _observed(file="mod.py")
    assert nw.check_duplicate_pr(ev, tmp_path) == "unknown"


# --- parse_model_verdict ----------------------------------------------------
def test_parse_model_verdict_confirmed():
    out = nw.parse_model_verdict(
        '{"verdict": "confirmed", "reason": "it is real", "confidence": "high"}'
    )
    assert out == {"verdict": "confirmed", "reason": "it is real", "confidence": "high"}


def test_parse_model_verdict_rejected():
    out = nw.parse_model_verdict(
        '{"verdict": "rejected", "reason": "already fixed", "confidence": "medium"}'
    )
    assert out["verdict"] == "rejected"


def test_parse_model_verdict_abstain():
    out = nw.parse_model_verdict(
        '{"verdict": "abstain", "reason": "insufficient", "confidence": "low"}'
    )
    assert out["verdict"] == "abstain"


def test_parse_model_verdict_extracts_from_prose():
    out = nw.parse_model_verdict(
        'Here is my answer:\n{"verdict": "confirmed", "reason": "x", "confidence": "low"}\nDone.'
    )
    assert out["verdict"] == "confirmed"


def test_parse_model_verdict_malformed_not_json_abstains():
    out = nw.parse_model_verdict("not json at all")
    assert out["verdict"] == "abstain"
    assert out["confidence"] == "low"


def test_parse_model_verdict_bad_enum_abstains():
    out = nw.parse_model_verdict('{"verdict": "maybe", "reason": "x", "confidence": "high"}')
    assert out["verdict"] == "abstain"


# --- verify_observation: deterministic outcomes (model NOT called) ----------
def test_verify_stale_does_not_call_model(tmp_path, monkeypatch):
    called = []
    monkeypatch.setattr(nw, "run_codex_consultation", lambda **k: called.append(k) or "{}")
    ev = _observed(file="missing.py", normalized_anchor="def x():")
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "stale"
    assert verified["payload"]["model_confidence"] is None
    assert verified["judgment_source"] == "deterministic"
    assert called == []


def test_verify_rejected_path_traversal_does_not_call_model(tmp_path, monkeypatch):
    called = []
    monkeypatch.setattr(nw, "run_codex_consultation", lambda **k: called.append(k) or "{}")
    ev = _observed(file="../../etc/passwd", normalized_anchor="root")
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "rejected"
    assert called == []


def test_verify_duplicate_does_not_call_model(tmp_path, monkeypatch):
    called = []
    monkeypatch.setattr(nw, "run_codex_consultation", lambda **k: called.append(k) or "{}")
    monkeypatch.setattr(nw, "check_duplicate_pr", lambda ev, root: "open_pr_duplicate")
    src = tmp_path / "mod.py"
    src.write_text("class Widget:\n    x = 1\n", encoding="utf-8")
    ev = _observed(file="mod.py", normalized_anchor="class Widget:")
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "duplicate"
    assert called == []


# --- verify_observation: model path -----------------------------------------
def _current_repo(tmp_path, anchor="class Widget:"):
    """A tmp repo whose mod.py contains the anchor (replay -> current)."""
    src = tmp_path / "mod.py"
    src.write_text(f"{anchor}\n    x = 1\n", encoding="utf-8")
    return _observed(file="mod.py", normalized_anchor=anchor)


def _fake_model(monkeypatch, json_out):
    captured = {}

    def fake(**kwargs):
        captured.update(kwargs)
        return json_out

    monkeypatch.setattr(nw, "run_codex_consultation", fake)
    # Also block gh so duplicate check is deterministic-none/unknown.
    monkeypatch.setattr(nw, "check_duplicate_pr", lambda ev, root: "none")
    return captured


def _linear_observed(tmp_path, *, category="bug", finding_key=None):
    anchor = "class Widget:"
    title = "Widget does not validate"
    evidence = "the `class Widget:` implementation accepts invalid state"
    recommendation = "validate state before use"
    law_or_rule = "BUG-1"
    expected_key = nw.derive_finding_key(
        category, "mod.py", lq.effective_claim_signature(evidence, anchor)
    )
    (tmp_path / "mod.py").write_text(f"{anchor}\n    x = 1\n", encoding="utf-8")
    return _observed(
        file="mod.py",
        normalized_anchor=anchor,
        evidence=evidence,
        finding_key=finding_key or expected_key,
        category=category,
        law_or_rule=law_or_rule,
        title=title,
        recommendation=recommendation,
    )


def _run_verify(tmp_path, ledger, extra_args=None):
    parser = nw.build_parser()
    args = [
        "verify",
        "--repo-root", str(tmp_path),
        "--ledger", str(ledger),
    ]
    if extra_args:
        args.extend(extra_args)
    parsed = parser.parse_args(args)
    return parsed.func(parsed)


def test_cmd_verify_enqueue_linear_confirmed_writes_one_reconciled_event(tmp_path, monkeypatch):
    ledger = tmp_path / "events.jsonl"
    observed = _linear_observed(tmp_path)
    nw.append_event(ledger, observed)
    _fake_model(
        monkeypatch,
        '{"verdict": "confirmed", "reason": "still real", "confidence": "high"}',
    )
    written = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: written.append(ev) or tmp_path / "inbox.json")

    assert _run_verify(tmp_path, ledger, ["--enqueue-linear"]) == 0

    assert len(written) == 1
    ev = written[0]
    p = observed["payload"]
    expected = nw.derive_finding_key(
        p["category"],
        p["file"],
        lq.effective_claim_signature(p["evidence"], p["normalized_anchor"]),
    )
    assert ev["source_type"] == "nightwatch"
    assert ev["finding_key"] == expected
    assert ev["finding_key"] == lq.derive_finding_key_code(
        p["category"],
        p["file"],
        lq.effective_claim_signature(p["evidence"], p["normalized_anchor"]),
    )


def test_cmd_verify_enqueue_linear_rejected_writes_no_event(tmp_path, monkeypatch):
    ledger = tmp_path / "events.jsonl"
    nw.append_event(
        ledger,
        _observed(file="../../etc/passwd", normalized_anchor="root", category="bug"),
    )
    written = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: written.append(ev) or tmp_path / "inbox.json")

    assert _run_verify(tmp_path, ledger, ["--enqueue-linear"]) == 0

    assert written == []


def _confirmed_event(finding_key="fk-1"):
    return {
        "schema_version": 1,
        "event_type": "verified",
        "finding_key": finding_key,
        "observation_id": "obs-1",
        "payload": {"verify_status": "confirmed"},
    }


def test_enqueue_subject_findings_dedup_across_rephrasing(tmp_path, monkeypatch):
    captured = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: captured.append(ev) or tmp_path / "event.json")
    first = _observed(
        file=None,
        category="bug",
        title="Old title",
        evidence="The `repo_gate` check fails.",
        recommendation="Fix A",
    )
    first["payload"]["subject_kind"] = "repo"
    first["payload"]["subject_id"] = "recoil"
    second = _observed(
        file=None,
        category="bug",
        title="New title",
        evidence="Different prose still cites `repo_gate`.",
        recommendation="Fix B",
        observation_id="obs-2",
    )
    second["payload"]["subject_kind"] = "repo"
    second["payload"]["subject_id"] = "recoil"

    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(), observed=first, run_id="run"
    )
    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(), observed=second, run_id="run"
    )
    assert captured[0]["finding_key"] == captured[1]["finding_key"]


def test_enqueue_subject_findings_distinct_for_distinct_claims(tmp_path, monkeypatch):
    captured = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: captured.append(ev) or tmp_path / "event.json")
    first = _observed(file=None, category="bug", evidence="The `repo_gate` check fails.")
    first["payload"]["subject_kind"] = "repo"
    first["payload"]["subject_id"] = "recoil"
    second = _observed(
        file=None,
        category="bug",
        evidence="The `audit_gate` check fails.",
        observation_id="obs-2",
    )
    second["payload"]["subject_kind"] = "repo"
    second["payload"]["subject_id"] = "recoil"

    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(), observed=first, run_id="run"
    )
    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(), observed=second, run_id="run"
    )
    assert captured[0]["finding_key"] != captured[1]["finding_key"]


def test_enqueue_no_file_no_subject_still_enqueues(tmp_path, monkeypatch):
    captured = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: captured.append(ev) or tmp_path / "event.json")
    observed = _observed(
        file=None,
        category="bug",
        evidence="The `repo_gate` check fails.",
        normalized_anchor="repo_gate",
    )

    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(), observed=observed, run_id="run"
    )
    assert captured[0]["finding_key"].startswith("sha256:")


def test_enqueue_scanner_survivors_distinct_per_violation(tmp_path, monkeypatch):
    captured = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: captured.append(ev) or tmp_path / "event.json")
    key_a = nw.derive_finding_key(
        "scanner", "mod.py", "E501|12|1|Line too long (105 > 100)|anchor"
    )
    key_b = nw.derive_finding_key(
        "scanner", "mod.py", "E501|88|1|Line too long (105 > 100)|anchor"
    )
    first = _observed(
        file="mod.py",
        category="scanner",
        title="ruff E501",
        evidence="Line too long (105 > 100)",
        recommendation="",
        finding_key=key_a,
    )
    first["payload"]["source_type"] = "scanner"
    second = _observed(
        file="mod.py",
        category="scanner",
        title="ruff E501",
        evidence="Line too long (105 > 100)",
        recommendation="",
        finding_key=key_b,
        observation_id="obs-2",
    )
    second["payload"]["source_type"] = "scanner"

    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(key_a), observed=first, run_id="run"
    )
    assert nw.enqueue_linear_confirmed_survivor(
        verified=_confirmed_event(key_b), observed=second, run_id="run"
    )
    assert captured[0]["finding_key"] == key_a
    assert captured[1]["finding_key"] == key_b
    assert captured[0]["finding_key"] != captured[1]["finding_key"]


def test_cmd_verify_default_does_not_enqueue_linear(tmp_path, monkeypatch):
    ledger = tmp_path / "events.jsonl"
    nw.append_event(ledger, _linear_observed(tmp_path))
    _fake_model(
        monkeypatch,
        '{"verdict": "confirmed", "reason": "still real", "confidence": "high"}',
    )
    written = []
    monkeypatch.setattr(lq, "atomic_write_inbox", lambda ev: written.append(ev) or tmp_path / "inbox.json")

    assert _run_verify(tmp_path, ledger) == 0

    assert written == []


def test_verify_model_confirmed(tmp_path, monkeypatch):
    captured = _fake_model(
        monkeypatch,
        '{"verdict": "confirmed", "reason": "real", "confidence": "high"}',
    )
    ev = _current_repo(tmp_path)
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "confirmed"
    assert verified["judgment_source"] == "codex_consult"
    assert verified["payload"]["model_confidence"] == "high"
    # Uses the checked-in schema path; no temp schema file created.
    assert captured["output_schema"] == str(nw.DEFAULT_VERIFY_SCHEMA)
    assert captured["effort"] == "high"
    _assert_prompt_grants_read_only_file_access(captured["prompt"])
    assert captured["cwd"] == tmp_path


def test_verify_model_rejected(tmp_path, monkeypatch):
    _fake_model(
        monkeypatch,
        '{"verdict": "rejected", "reason": "fixed", "confidence": "medium"}',
    )
    ev = _current_repo(tmp_path)
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "rejected"


def test_verify_model_abstain(tmp_path, monkeypatch):
    _fake_model(
        monkeypatch,
        '{"verdict": "abstain", "reason": "unsure", "confidence": "low"}',
    )
    ev = _current_repo(tmp_path)
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "abstain"


def test_verify_model_malformed_abstains_not_confirmed(tmp_path, monkeypatch):
    _fake_model(monkeypatch, "not json")
    ev = _current_repo(tmp_path)
    verified = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified["payload"]["verify_status"] == "abstain"

    _fake_model(monkeypatch, '{"verdict": "maybe"}')
    verified2 = nw.verify_observation(
        ev, tmp_path, schema_path=nw.DEFAULT_VERIFY_SCHEMA, run_id="vrun"
    )
    assert verified2["payload"]["verify_status"] == "abstain"


# --- load_unverified_observations -------------------------------------------
def test_load_unverified_skips_already_verified():
    obs1 = _observed(observation_id="o1", finding_key="f1")
    obs2 = _observed(observation_id="o2", finding_key="f2")
    verified_o1 = {"event_type": "verified", "observation_id": "o1", "finding_key": "f1"}
    pending = nw.load_unverified_observations([obs1, obs2, verified_o1], limit=25)
    ids = [e["observation_id"] for e in pending]
    assert ids == ["o2"]


def test_load_unverified_honors_limit():
    events = [_observed(observation_id=f"o{i}", finding_key=f"f{i}") for i in range(5)]
    pending = nw.load_unverified_observations(events, limit=2)
    assert len(pending) == 2


# --- cmd_verify: appends without modifying observed events ------------------
def test_cmd_verify_appends_without_modifying_observed(tmp_path, monkeypatch):
    src = tmp_path / "mod.py"
    src.write_text("class Widget:\n    x = 1\n", encoding="utf-8")

    monkeypatch.setattr(
        nw, "run_codex_consultation",
        lambda **k: '{"verdict": "confirmed", "reason": "real", "confidence": "high"}',
    )
    monkeypatch.setattr(nw, "check_duplicate_pr", lambda ev, root: "none")

    ledger = tmp_path / "events.jsonl"
    observed_events = [
        _observed(file="mod.py", normalized_anchor="class Widget:",
                  observation_id=f"o{i}", finding_key=f"f{i}")
        for i in range(3)
    ]
    for ev in observed_events:
        nw.append_event(ledger, ev)

    original_lines = ledger.read_text(encoding="utf-8").splitlines()
    assert len(original_lines) == 3

    parser = nw.build_parser()
    args = parser.parse_args(
        ["verify", "--repo-root", str(tmp_path), "--ledger", str(ledger)]
    )
    assert args.func(args) == 0

    all_lines = ledger.read_text(encoding="utf-8").splitlines()
    # Original observed lines unchanged.
    assert all_lines[:3] == original_lines
    # Three verified events appended.
    assert len(all_lines) == 6
    appended = nw.load_events(ledger)[3:]
    for e in appended:
        assert e["event_type"] == "verified"
        assert e["payload"]["verify_status"] == "confirmed"

    # Re-running verify is idempotent — all are now verified.
    parser2 = nw.build_parser()
    args2 = parser2.parse_args(
        ["verify", "--repo-root", str(tmp_path), "--ledger", str(ledger)]
    )
    assert args2.func(args2) == 0
    assert len(ledger.read_text(encoding="utf-8").splitlines()) == 6


# ---------------------------------------------------------------------------
# Phase 4: Report Projection.
# ---------------------------------------------------------------------------
def _observed_full(finding_key, observation_id, event_ts, *, title="t", evidence="e",
                   recommendation="r", risk_class="escalation", source_type="codex_audit",
                   file="recoil/pipeline/tools/consult.py", line=1, category="ssot",
                   normalized_anchor="anchor"):
    """Observed event with the full report-relevant payload + a chosen event_ts."""
    return {
        "schema_version": 1,
        "event_ts": event_ts,
        "event_type": "observed",
        "run_id": "run-1",
        "repo_root": "/tmp/repo",
        "head_sha": "abc1234",
        "finding_key": finding_key,
        "observation_id": observation_id,
        "judgment_source": "nightwatch_ingest",
        "event_id": "sha256:fake",
        "payload": {
            "source_type": source_type,
            "file": file,
            "line": line,
            "category": category,
            "risk_class": risk_class,
            "classification": "would_escalate",
            "law_or_rule": None,
            "normalized_anchor": normalized_anchor,
            "claim_fingerprint": "sha256:cf",
            "title": title,
            "evidence": evidence,
            "recommendation": recommendation,
        },
    }


def _verified_full(finding_key, observation_id, event_ts, verify_status,
                   verify_reason="because", model_confidence="high"):
    return {
        "schema_version": 1,
        "event_ts": event_ts,
        "event_type": "verified",
        "run_id": "vrun-1",
        "repo_root": "/tmp/repo",
        "head_sha": "abc1234",
        "finding_key": finding_key,
        "observation_id": observation_id,
        "judgment_source": "codex_consult",
        "event_id": "sha256:fakev",
        "payload": {
            "verify_status": verify_status,
            "verify_reason": verify_reason,
            "deterministic_status": "current",
            "duplicate_status": "none",
            "model_confidence": model_confidence,
        },
    }


def test_report_projection_uses_latest_observed_title():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="OLD TITLE"),
        _observed_full("fk", "o2", "2026-06-02T00:00:00Z", title="NEW TITLE"),
        _verified_full("fk", "o2", "2026-06-02T01:00:00Z", "confirmed"),
    ]
    projections = nw.project_findings(events)
    assert len(projections) == 1
    assert projections[0]["title"] == "NEW TITLE"
    assert projections[0]["observation_id"] == "o2"


def test_report_uses_latest_verified_status():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "abstain"),
        _verified_full("fk", "o1", "2026-06-01T02:00:00Z", "confirmed"),
    ]
    projections = nw.project_findings(events)
    assert projections[0]["verify_status"] == "confirmed"


def test_report_surfaces_confirmed_only_by_default():
    events = [
        _observed_full("fk-ok", "o1", "2026-06-01T00:00:00Z", title="GOOD ONE"),
        _verified_full("fk-ok", "o1", "2026-06-01T01:00:00Z", "confirmed"),
        _observed_full("fk-no", "o2", "2026-06-01T00:00:00Z", title="REJECTED ONE"),
        _verified_full("fk-no", "o2", "2026-06-01T01:00:00Z", "rejected"),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "GOOD ONE" in text
    assert "REJECTED ONE" not in text


@pytest.mark.parametrize("status", ["rejected", "abstain", "stale", "duplicate"])
def test_report_suppresses_non_confirmed_by_default(status):
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="HIDE ME"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", status),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "HIDE ME" not in text
    assert "No confirmed findings." in text


def test_report_includes_suppressed_under_diagnostics():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="DIAG ME"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "abstain"),
    ]
    default_text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "DIAG ME" not in default_text

    diag_text = nw.render_report(
        events, pathlib.Path("/tmp/ledger.jsonl"), include_diagnostics=True
    )
    assert "## Diagnostics" in diag_text
    assert "DIAG ME" in diag_text


def test_report_sorts_escalations_before_mechanical():
    events = [
        _observed_full("fk-mech", "o1", "2026-06-01T00:00:00Z",
                       title="MECH FINDING", risk_class="mechanical"),
        _verified_full("fk-mech", "o1", "2026-06-01T01:00:00Z", "confirmed"),
        _observed_full("fk-esc", "o2", "2026-06-01T00:00:00Z",
                       title="ESC FINDING", risk_class="escalation"),
        _verified_full("fk-esc", "o2", "2026-06-01T01:00:00Z", "confirmed"),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert text.index("ESC FINDING") < text.index("MECH FINDING")


def test_report_prints_observation_count_and_seen():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z"),
        _observed_full("fk", "o2", "2026-06-03T00:00:00Z"),
        _verified_full("fk", "o2", "2026-06-03T01:00:00Z", "confirmed"),
    ]
    projections = nw.project_findings(events)
    assert projections[0]["observation_count"] == 2
    assert projections[0]["first_seen"] == "2026-06-01T00:00:00Z"
    assert projections[0]["last_seen"] == "2026-06-03T00:00:00Z"

    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "observation_count: 2" in text
    assert "first_seen: 2026-06-01T00:00:00Z" in text
    assert "last_seen: 2026-06-03T00:00:00Z" in text


def test_project_findings_flags_collapse_with_member_claims():
    events = [
        _observed_full("fk-collapse", "o1", "2026-06-01T00:00:00Z", title="First claim"),
        _observed_full("fk-collapse", "o2", "2026-06-02T00:00:00Z", title="Second claim"),
        _observed_full("fk-single", "o3", "2026-06-03T00:00:00Z", title="Single claim"),
    ]
    by_key = {p["finding_key"]: p for p in nw.project_findings(events)}
    collapsed = by_key["fk-collapse"]
    assert collapsed["distinct_observation_count"] == 2
    assert len(collapsed["collapsed_observations"]) == 2
    assert {m["title"] for m in collapsed["collapsed_observations"]} == {
        "First claim",
        "Second claim",
    }
    assert by_key["fk-single"]["distinct_observation_count"] == 1


def test_render_report_surfaces_possible_collapse_distinct_anchor():
    events = [
        _observed_full(
            "fk-collapse",
            "o1",
            "2026-06-01T00:00:00Z",
            title="First claim",
            normalized_anchor="def alpha",
        ),
        _observed_full(
            "fk-collapse",
            "o2",
            "2026-06-02T00:00:00Z",
            title="Second claim",
            normalized_anchor="def beta",
        ),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "## Possible collapse" in text
    assert "fk-collapse" in text
    assert "def alpha" in text
    assert "def beta" in text
    assert "First claim" in text
    assert "Second claim" in text


def test_render_report_surfaces_possible_collapse_same_anchor_different_claim():
    events = [
        _observed_full(
            "fk-collapse",
            "o1",
            "2026-06-01T00:00:00Z",
            title="First same-anchor claim",
            evidence="first evidence",
            normalized_anchor="same anchor",
        ),
        _observed_full(
            "fk-collapse",
            "o2",
            "2026-06-02T00:00:00Z",
            title="Second same-anchor claim",
            evidence="second evidence",
            normalized_anchor="same anchor",
        ),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/ledger.jsonl"))
    assert "## Possible collapse" in text
    assert "First same-anchor claim" in text
    assert "Second same-anchor claim" in text
    assert "first evidence" in text
    assert "second evidence" in text


def test_render_report_possible_collapse_section_present_when_none():
    text = nw.render_report(
        [_observed_full("fk-single", "o1", "2026-06-01T00:00:00Z")],
        pathlib.Path("/tmp/ledger.jsonl"),
    )
    assert "## Possible collapse" in text
    assert "No possible collapses." in text


def test_render_report_surfaces_possible_collapse_with_diagnostics_on():
    events = [
        _observed_full(
            "fk-collapse",
            "o1",
            "2026-06-01T00:00:00Z",
            title="First diagnostic claim",
            evidence="first diagnostic evidence",
        ),
        _observed_full(
            "fk-collapse",
            "o2",
            "2026-06-02T00:00:00Z",
            title="Second diagnostic claim",
            evidence="second diagnostic evidence",
        ),
    ]
    text = nw.render_report(
        events, pathlib.Path("/tmp/ledger.jsonl"), include_diagnostics=True
    )
    assert "## Possible collapse" in text
    assert "fk-collapse" in text
    assert "First diagnostic claim" in text
    assert "Second diagnostic claim" in text
    assert "first diagnostic evidence" in text
    assert "second diagnostic evidence" in text


def test_report_prints_adjudication_append_commands():
    ledger = pathlib.Path("/tmp/some ledger.jsonl")
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
    ]
    text = nw.render_report(events, ledger)
    assert "printf '%s\\n'" in text
    assert ">>" in text
    # Ledger path appears (shlex-quoted because it has a space).
    assert shlex.quote(str(ledger)) in text
    for adjudication in ("agree", "wrong-classification", "false-positive", "ignore-for-now"):
        assert f'"human_adjudication":"{adjudication}"' in text
    # Compact JSON, no spaces after colons.
    assert '"event_type":"human_adjudicated"' in text


def test_format_adjudication_command_shlex_quotes():
    ledger = pathlib.Path("/tmp/odd dir/it's a ledger.jsonl")
    finding = {"finding_key": "fk-1", "observation_id": "o-1"}
    cmd = nw.format_adjudication_command(ledger, finding, "agree")
    # Single printf ... >> ... line.
    assert cmd.startswith("printf '%s\\n' ")
    assert " >> " in cmd
    assert cmd.count("\n") == 0
    # Ledger path is shlex-quoted (has space + apostrophe).
    assert shlex.quote(str(ledger)) in cmd
    # Embedded JSON is compact.
    assert '"event_type":"human_adjudicated"' in cmd
    assert '"human_adjudication":"agree"' in cmd


def test_render_report_does_not_write_or_execute(tmp_path):
    # Ledger pre-seeded; render_report must NOT mutate it.
    ledger = tmp_path / "events.jsonl"
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
    ]
    for e in events:
        nw.append_event(ledger, e)
    before = ledger.read_bytes()

    nw.render_report(events, ledger)

    assert ledger.read_bytes() == before


def test_cmd_report_prints_and_writes_nothing(tmp_path, capsys):
    ledger = tmp_path / "events.jsonl"
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="REPORTED"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
    ]
    for e in events:
        nw.append_event(ledger, e)
    before_bytes = ledger.read_bytes()
    before_files = sorted(p.name for p in tmp_path.iterdir())

    parser = nw.build_parser()
    args = parser.parse_args(["report", "--ledger", str(ledger)])
    rc = args.func(args)
    assert rc == 0

    out = capsys.readouterr().out
    assert "REPORTED" in out
    assert "# Nightwatch Report" in out
    # Ledger unchanged; no new file appeared.
    assert ledger.read_bytes() == before_bytes
    assert sorted(p.name for p in tmp_path.iterdir()) == before_files


def test_cmd_report_include_diagnostics_flag(tmp_path, capsys):
    ledger = tmp_path / "events.jsonl"
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="DIAG ONLY"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "rejected"),
    ]
    for e in events:
        nw.append_event(ledger, e)

    parser = nw.build_parser()
    args = parser.parse_args(
        ["report", "--ledger", str(ledger), "--include-diagnostics"]
    )
    assert args.func(args) == 0
    out = capsys.readouterr().out
    assert "## Diagnostics" in out
    assert "DIAG ONLY" in out


# ---------------------------------------------------------------------------
# Human adjudication honored in projection / report.
# ---------------------------------------------------------------------------
def _adjudicated(finding_key, observation_id, adjudication):
    """A human_adjudicated event (the envelope format_adjudication_command emits)."""
    return {
        "schema_version": 1,
        "event_type": "human_adjudicated",
        "finding_key": finding_key,
        "observation_id": observation_id,
        "judgment_source": "human",
        "payload": {"human_adjudication": adjudication, "note": None},
    }


def test_projection_exposes_human_adjudication():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
        _adjudicated("fk", "o1", "false-positive"),
    ]
    proj = nw.project_findings(events)
    assert proj[0]["human_adjudication"] == "false-positive"


@pytest.mark.parametrize("adjudication", ["false-positive", "ignore-for-now"])
def test_report_suppresses_human_adjudicated_finding(adjudication):
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="ADJUDICATED AWAY"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
        _adjudicated("fk", "o1", adjudication),
    ]
    # Confirmed-but-adjudicated -> removed from the confirmed report...
    default_text = nw.render_report(events, pathlib.Path("/tmp/l.jsonl"))
    assert "ADJUDICATED AWAY" not in default_text
    assert "No confirmed findings." in default_text
    # ...but still visible under diagnostics so it isn't lost.
    diag_text = nw.render_report(
        events, pathlib.Path("/tmp/l.jsonl"), include_diagnostics=True
    )
    assert "ADJUDICATED AWAY" in diag_text


def test_report_keeps_confirmed_when_adjudication_is_agree():
    events = [
        _observed_full("fk", "o1", "2026-06-01T00:00:00Z", title="STILL CONFIRMED"),
        _verified_full("fk", "o1", "2026-06-01T01:00:00Z", "confirmed"),
        _adjudicated("fk", "o1", "agree"),
    ]
    text = nw.render_report(events, pathlib.Path("/tmp/l.jsonl"))
    assert "STILL CONFIRMED" in text


# ---------------------------------------------------------------------------
# Audit-file selection (newest RUN by timestamp, not subsystem name).
# ---------------------------------------------------------------------------
def test_latest_findings_json_picks_newest_timestamp_not_subsystem(tmp_path):
    # Alphabetically "workspace" > "core", but the core run is NEWER — the newest
    # RUN must win, not the alphabetically-last subsystem filename.
    older_late_subsystem = tmp_path / "findings-recoil-workspace-20260602-000000.json"
    newer_early_subsystem = tmp_path / "findings-recoil-core-20260604-120000.json"
    older_late_subsystem.write_text("{}", encoding="utf-8")
    newer_early_subsystem.write_text("{}", encoding="utf-8")
    assert nw._latest_findings_json(tmp_path) == newer_early_subsystem


def test_latest_findings_json_none_when_empty(tmp_path):
    assert nw._latest_findings_json(tmp_path) is None


# ---------------------------------------------------------------------------
# Hygiene ingest scoped to the `## Open` section.
# ---------------------------------------------------------------------------
def test_ingest_hygiene_queue_ignores_closed_sections(tmp_path):
    hygiene = tmp_path / "hygiene-queue.md"
    hygiene.write_text(
        "## Open\n"
        "- 2026-06-04 | live/thing.py:5 | open issue | fix later\n"
        "## Archived\n"
        "- 2026-05-01 | old/thing.py:9 | resolved issue | already fixed\n",
        encoding="utf-8",
    )
    events = nw.ingest_hygiene_queue(hygiene, "run", nw.DEFAULT_REPO_ROOT)
    assert len(events) == 1
    assert events[0]["payload"]["file"] == "live/thing.py"