"""consult_artifact_lint.py — path-citation linter (REC-48 v1).

A deterministic meta-gate (SYNTHESIS b#11) that catches fabricated/stale file
citations in design artifacts before a model reviews them. v1 verifies path
existence only. These tests use REAL fixtures: a temp markdown citing an
existing repo path (passes), a fabricated path (flagged MISSING + exit 1), a
`path:line` form, and a URL that must NOT be flagged.
"""

from pathlib import Path

from recoil.pipeline.tools.consult_artifact_lint import (
    classify,
    extract_candidates,
    lint,
    lint_file,
    main,
)

# A path that genuinely exists in the repo — used as the "REAL" fixture.
REAL_REL_PATH = "recoil/pipeline/tools/consult.py"
# A path that does not exist — used as the "MISSING" fixture.
FAKE_REL_PATH = "recoil/pipeline/tools/does_not_exist.py"
REPO_ROOT = Path(__file__).resolve().parents[4]


def _write(tmp_path, body):
    md = tmp_path / "artifact.md"
    md.write_text(body, encoding="utf-8")
    return md


def test_real_path_passes(tmp_path):
    md = _write(tmp_path, f"See the consult tool at `{REAL_REL_PATH}` for details.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["real"] == 1
    assert result["citations"][0]["status"] == "REAL"


def test_fabricated_path_flagged_missing(tmp_path):
    md = _write(tmp_path, f"This cites `{FAKE_REL_PATH}` which is fake.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 1
    assert result["missing_citations"][0]["token"] == FAKE_REL_PATH
    assert result["missing_citations"][0]["line"] == 1


def test_fabricated_path_exit_1(tmp_path):
    md = _write(tmp_path, f"`{FAKE_REL_PATH}`\n")
    rc = main([str(md), "--repo-root", str(REPO_ROOT)])
    assert rc == 1


def test_real_path_exit_0(tmp_path):
    md = _write(tmp_path, f"`{REAL_REL_PATH}`\n")
    rc = main([str(md), "--repo-root", str(REPO_ROOT)])
    assert rc == 0


def test_path_line_suffix_stripped_before_existence_check(tmp_path):
    # `path:123` must resolve to the file (line suffix stripped), so REAL.
    md = _write(tmp_path, f"Bug at `{REAL_REL_PATH}:42`.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["citations"][0]["token"] == f"{REAL_REL_PATH}:42"
    assert result["citations"][0]["status"] == "REAL"


def test_url_not_flagged(tmp_path):
    body = (
        "See https://arxiv.org/html/2603.17104v1 and "
        "`http://example.com/path/to/thing.html` for sources.\n"
    )
    md = _write(tmp_path, body)
    result = lint_file(md, REPO_ROOT)
    # No URL should be treated as a path citation -> nothing MISSING.
    assert result["missing"] == 0
    tokens = [c["token"] for c in result["citations"]]
    assert not any("://" in t for t in tokens)


def test_bare_prose_path_detected(tmp_path):
    # A fabricated bare path (with extension) in prose should be flagged.
    md = _write(tmp_path, "The fix lives in recoil/pipeline/tools/nope_xyz.py today.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 1
    assert (
        result["missing_citations"][0]["token"] == "recoil/pipeline/tools/nope_xyz.py"
    )


def test_bare_word_not_a_candidate(tmp_path):
    # Language keywords / bare words with no slash must never be candidates.
    md = _write(tmp_path, "Set effort to `medium` or `high`; the value is fine.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_absolute_path_resolved_as_is(tmp_path):
    real_abs = str(REPO_ROOT / REAL_REL_PATH)
    md = _write(tmp_path, f"Absolute: `{real_abs}`\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["citations"][0]["status"] == "REAL"


def test_classify_real_and_missing():
    assert classify(REAL_REL_PATH, REPO_ROOT)[0] == "REAL"
    assert classify(FAKE_REL_PATH, REPO_ROOT)[0] == "MISSING"
    # Line suffix stripped before checking.
    assert classify(f"{REAL_REL_PATH}:99", REPO_ROOT)[0] == "REAL"


def test_lint_multiple_files_aggregates_missing(tmp_path):
    good = _write(tmp_path, f"`{REAL_REL_PATH}`\n")
    bad_dir = tmp_path / "sub"
    bad_dir.mkdir()
    bad = bad_dir / "bad.md"
    bad.write_text(f"`{FAKE_REL_PATH}`\n", encoding="utf-8")
    results, any_missing = lint([good, bad], REPO_ROOT)
    assert any_missing is True
    assert len(results) == 2


def test_extract_candidates_dedupes_same_token_same_line(tmp_path):
    line = f"`{REAL_REL_PATH}` and again `{REAL_REL_PATH}`"
    cands = extract_candidates(line)
    # Same token on the same line collapses to one candidate.
    assert cands == [(REAL_REL_PATH, 1)]


def test_artifact_relative_path_is_real(tmp_path):
    # A sibling file cited relative to the artifact (not repo root) must be
    # REAL — consult folders cite `research/foo.md` relative to themselves.
    (tmp_path / "research").mkdir()
    (tmp_path / "research" / "notes.md").write_text("x", encoding="utf-8")
    md = _write(tmp_path, "See `research/notes.md` for sources.\n")
    result = lint_file(md, REPO_ROOT)  # repo root does NOT contain research/notes.md
    assert result["missing"] == 0
    assert result["citations"][0]["status"] == "REAL"


def test_slashed_notation_not_a_candidate(tmp_path):
    # "max/ultracode" is effort notation, "-c/--config" is a CLI-flag pair;
    # neither is a path and neither should be flagged MISSING.
    md = _write(
        tmp_path, "Use `max/ultracode` only with `-c/--config` and `-p/--profile`.\n"
    )
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_no_false_missing_on_clean_synthesis_excerpt(tmp_path):
    # Mixed prose with a URL, a bare word, and one real path — only the real
    # path should be a citation, and it must be REAL (zero MISSING).
    body = (
        "## Build order\n"
        "Wire the flag into the codex branch of `recoil/pipeline/tools/consult.py`.\n"
        "Research: https://arxiv.org/html/2603.24755v1 (reliability).\n"
        "Cap both engines at `high` without a frozen target.\n"
    )
    md = _write(tmp_path, body)
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["real"] == 1


# --- Codex-review hardening: false-MISSING classes found on real repo specs ---


def test_env_and_template_paths_not_flagged(tmp_path):
    # `$RECOIL_ROOT/...`, `${VAR:-x}/...`, `projects/{project}/...`, `~/...`
    # are templated/env paths that cannot resolve literally. Never flag them.
    body = (
        "Logs at `$RECOIL_ROOT/_dispatch_logs/receipts.jsonl`.\n"
        "Override via `${RECOIL_ROOT:-recoil}/_dispatch_logs/receipts.jsonl`.\n"
        "Bible at `projects/{project}/state/visual/global_bible.json`.\n"
        "Config in `~/Dropbox/CLAUDE_PROJECTS/config.json`.\n"
    )
    md = _write(tmp_path, body)
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_line_range_suffix_stripped(tmp_path):
    # `path:NNN-MMM` range suffix must be stripped before existence check.
    md = _write(tmp_path, f"Edit `{REAL_REL_PATH}:1572-1605` carefully.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["citations"][0]["status"] == "REAL"
    # The inner bare-path match of the same path must not create a duplicate.
    assert result["total"] == 1


def test_dot_relative_path_is_candidate(tmp_path):
    # `.github/...` and `../sibling/file.md` are real path shapes, not hostnames.
    (tmp_path / ".github").mkdir()
    (tmp_path / ".github" / "ci.yml").write_text("x", encoding="utf-8")
    md = _write(tmp_path, "CI lives in `.github/ci.yml` today.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 0
    assert result["citations"][0]["status"] == "REAL"


def test_member_notation_not_flagged(tmp_path):
    # `getattr/dict.get` looks path-ish but has no real parent dir -> SKIP,
    # so it must not be reported MISSING (false-MISSING avoidance).
    md = _write(tmp_path, "Use `getattr/dict.get` for safe access.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_diff_prefix_paths_not_flagged(tmp_path):
    # Pasted-diff `a/recoil/...` / `b/recoil/...` prefixes have no `a/` or `b/`
    # parent dir -> SKIP, not MISSING.
    body = (
        "--- a/recoil/pipeline/tools/consult.py\n"
        "+++ b/recoil/pipeline/tools/consult.py\n"
    )
    md = _write(tmp_path, body)
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_fabricated_in_real_dir_still_flagged(tmp_path):
    # A fabricated file whose PARENT dir is real must still be MISSING — the
    # parent-dir rule must not suppress genuine stale/fabricated citations.
    md = _write(tmp_path, "`recoil/pipeline/tools/totally_made_up_zzz.py`\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 1


def test_fabricated_deep_directory_still_flagged(tmp_path):
    # Codex pass-2 finding: a path whose FIRST segment is a real top-level dir
    # but a DEEPER dir is fabricated must still be MISSING (not silently SKIP).
    # `recoil/` exists; `recoil/not_a_real_dir/` does not.
    md = _write(tmp_path, "`recoil/not_a_real_dir/fake.py`\n")
    result = lint_file(md, REPO_ROOT)
    assert result["missing"] == 1
    rc = main([str(md), "--repo-root", str(REPO_ROOT)])
    assert rc == 1


def test_env_var_path_inner_remnant_not_leaked(tmp_path):
    # Codex pass-2 finding: `$RECOIL_ROOT/recoil/tools/no_such.py` must not leak
    # a bare-path remnant (`recoil/tools/no_such.py`) into a MISSING flag.
    md = _write(tmp_path, "Logs at `$RECOIL_ROOT/recoil/pipeline/tools/no_such.py`.\n")
    result = lint_file(md, REPO_ROOT)
    assert result["total"] == 0


def test_env_strip_does_not_desync_later_inline_span(tmp_path):
    # Codex pass-3 finding: stripping an env token inside backticks must NOT
    # consume its closing backtick, or a LATER inline directory citation on the
    # same line is missed. Here the second span is a real directory citation.
    md = _write(tmp_path, "`$RECOIL_ROOT/nope.py` then `recoil/pipeline/tools/`\n")
    result = lint_file(md, REPO_ROOT)
    # The env token is dropped; the real directory must be detected as REAL.
    tokens = [c["token"] for c in result["citations"]]
    assert "recoil/pipeline/tools/" in tokens
    assert result["missing"] == 0
