# recoil/pipeline/tests/test_ref_resolver_monopoly.py
"""Property test: ref_resolver is the ONLY module that enumerates files
under output/refs/. Any other file that calls os.listdir / glob.glob /
Path.iterdir / Path.rglob with a path containing _canonical or output/refs
fails the build.

This test prevents the class of bug behind the April 6 9-round debug
convergence: three separate codepaths drifting on filesystem state.

Runs in pytest, pre-commit, and CI.
"""

import re
from pathlib import Path


PIPELINE_ROOT = Path(__file__).resolve().parents[1]
RECOIL_ROOT = PIPELINE_ROOT.parent

# ── Tier 1 allowlist (removed after Phase 9-14) ──────────────────
_TIER_1_ALLOWLIST: set[str] = set()

# ── Tier 2 allowlist (removed after Phase 15-17) ─────────────────
_TIER_2_ALLOWLIST: set[str] = set()  # Cleared in Phase 27

# ── Tier 3 allowlist (removed after Phase 18-21) ─────────────────
_TIER_3_ALLOWLIST: set[str] = set()

# ── Tier 4 allowlist (removed after Phase 23-26) ─────────────────
_TIER_4_ALLOWLIST: set[str] = set()

# ── Permanent allowlist (migration tools, tests, this file) ──────
_PERMANENT_ALLOWLIST = {
    "tests/test_ref_resolver_monopoly.py",
    "tests/test_ref_resolver.py",
    "tools/migration/starsend_to_engine.py",
    "tools/populate_canonical.py",
    "tools/validate_canonical_refs.py",
    "_lib/fs_watcher/watcher.py",
    "_lib/manifest_writer.py",
    "_lib/taxonomy.py",
    "editors/review_server.py",  # deprecated surface, Console v2 cutover imminent
    # migration tools — source-side historical paths intentional
    "tools/migrate_heroes.py",
    "tools/migrate_assets.py",
    "tools/migrate_refs.py",
    "api/routes/files.py",  # file-serving route — legitimate refs_dir iteration
    "tools/dispatch_cli.py",  # CLI tool — legitimate loc_dir iteration for ref resolution
}

ALLOWLIST = (
    _TIER_1_ALLOWLIST
    | _TIER_2_ALLOWLIST
    | _TIER_3_ALLOWLIST
    | _TIER_4_ALLOWLIST
    | _PERMANENT_ALLOWLIST
)

# Path-qualified raw enumeration: direct string match on canonical paths
FORBIDDEN_PATTERNS = [
    re.compile(r"os\.listdir\([^)]*(_canonical|output/refs)"),
    re.compile(r"glob\.glob\([^)]*(_canonical|output/refs)"),
    re.compile(r"\.glob\([^)]*(_canonical|output/refs)"),
    # iterdir/rglob on variables whose name suggests canonical asset filesystem.
    # Legitimate non-asset iterations (projects_root, episode_dir, exploration_dir,
    # video_dir, candidates_dir, previs_path, etc.) are intentionally NOT matched.
    re.compile(
        r"(?<![\w.])"
        r"(\w*(?:_canonical|refs_root|refs_dir|ref_dir|canonical_dir|"
        r"char_dir|loc_dir|prop_dir|cast_dir|char_refs|asset_ref_dir|"
        r"asset_dir|picks_dir|keystones_dir|expressions_dir)\w*)"
        r"\s*\.(?:iterdir|rglob)\("
    ),
    # ── v2/v3 hardcoded asset path strings ────────────────────────────
    # Any non-paths.py, non-_archive/ file that constructs asset paths
    # directly instead of using ProjectPaths.resolve_ref().
    re.compile(r'"refs/characters/'),
    re.compile(r'"refs/locations/'),
    re.compile(r'"refs/props/'),
    re.compile(r"'refs/characters/"),
    re.compile(r"'refs/locations/"),
    re.compile(r"'refs/props/"),
    re.compile(r'f"assets/identity/'),
    re.compile(r"f'assets/identity/"),
    re.compile(r'f"assets/char/'),
    re.compile(r"f'assets/char/"),
    re.compile(r'f"assets/loc/'),
    re.compile(r"f'assets/loc/"),
    re.compile(r'f"assets/prop/'),
    re.compile(r"f'assets/prop/"),
    # Direct Path construction outside paths.py
    re.compile(r'Path\([^)]*\)\s*/\s*"assets"\s*/'),
    # ── Raw path construction bypassing ProjectPaths ─────────
    # String-concatenated paths (e.g. "state/visual/plans" instead of
    # ProjectPaths.plans_dir)
    re.compile(r'"state/visual/'),
    re.compile(r"'state/visual/"),
    re.compile(r'"output/video/'),
    re.compile(r"'output/video/"),
    # os.path.join with raw state/visual or output/video segments
    re.compile(r'os\.path\.join\([^)]*"state"\s*,\s*"visual"'),
    re.compile(r"os\.path\.join\([^)]*'state'\s*,\s*'visual'"),
    re.compile(r'os\.path\.join\([^)]*"output"\s*,\s*"video"'),
    re.compile(r"os\.path\.join\([^)]*'output'\s*,\s*'video'"),
]


def _file_in_allowlist(path: Path) -> bool:
    rel = path.relative_to(PIPELINE_ROOT).as_posix()
    return rel in ALLOWLIST


def _is_python_file(path: Path) -> bool:
    return (
        path.suffix == ".py"
        and "__pycache__" not in path.parts
        and "_archive" not in path.parts
    )


def test_ref_resolver_is_only_enumeration_path():
    """Find every Python file in pipeline/ that touches the canonical filesystem
    via raw enumeration. Anything outside the allowlist fails the build."""
    violations: list[str] = []
    for py_file in PIPELINE_ROOT.rglob("*.py"):
        if not _is_python_file(py_file):
            continue
        if _file_in_allowlist(py_file):
            continue
        try:
            text = py_file.read_text()
        except (UnicodeDecodeError, FileNotFoundError):
            continue
        for line_num, line in enumerate(text.splitlines(), start=1):
            stripped = line.strip()
            if stripped.startswith("#"):
                continue
            for pattern in FORBIDDEN_PATTERNS:
                if pattern.search(line):
                    violations.append(
                        f"{py_file.relative_to(PIPELINE_ROOT)}:{line_num}: {stripped}"
                    )
                    break

    assert not violations, (
        "ref_resolver monopoly violation. The following files enumerate "
        "the canonical filesystem directly. Either route through "
        "recoil.core.ref_resolver, or add the file to ALLOWLIST in "
        "test_ref_resolver_monopoly.py if it has a legitimate reason "
        "(migration, the resolver itself, or a test):\n\n" + "\n".join(violations)
    )


_MIGRATED_RAW_PATTERNS = [
    re.compile(r'"state"\s*/\s*"visual"\s*/\s*"(shots|execution|passes|global_bible|casting_state)"'),
    re.compile(r"'state'\s*/\s*'visual'\s*/\s*'(shots|execution|passes|global_bible|casting_state)'"),
    re.compile(r'"state/visual/(shots|execution|passes|global_bible|casting_state)'),
    re.compile(r"'state/visual/(shots|execution|passes|global_bible|casting_state)"),
    re.compile(r'"output"\s*/\s*"video"'),
    re.compile(r"'output'\s*/\s*'video'"),
]

_MIGRATED_FILES = [
    RECOIL_ROOT / "execution" / "execution_store.py",
    RECOIL_ROOT / "execution" / "pass_store.py",
    RECOIL_ROOT / "core" / "project.py",
    PIPELINE_ROOT / "tools" / "generate_keyframes.py",
    PIPELINE_ROOT / "tools" / "reclaim_orphans.py",
    RECOIL_ROOT / "tools" / "reconcile_take_paths.py",
    RECOIL_ROOT / "workspace" / "verdict.py",
]


def test_projectpaths_migration_guard():
    """Files migrated to ProjectPaths in phases 2-5 must not regress
    to raw state/visual or output/video path construction."""
    missing = [f for f in _MIGRATED_FILES if not f.exists()]
    assert not missing, (
        "Migrated files no longer exist — update _MIGRATED_FILES if renamed:\n"
        + "\n".join(str(f.relative_to(RECOIL_ROOT)) for f in missing)
    )
    violations: list[str] = []
    for py_file in _MIGRATED_FILES:
        try:
            text = py_file.read_text()
        except (UnicodeDecodeError, FileNotFoundError):
            continue
        for line_num, line in enumerate(text.splitlines(), start=1):
            stripped = line.strip()
            if stripped.startswith("#"):
                continue
            for pattern in _MIGRATED_RAW_PATTERNS:
                if pattern.search(line):
                    rel = py_file.relative_to(RECOIL_ROOT)
                    violations.append(f"{rel}:{line_num}: {stripped}")
                    break

    assert not violations, (
        "ProjectPaths migration regression. These files were migrated to "
        "use ProjectPaths but have regressed to raw path construction. "
        "Use ProjectPaths.for_project(slug).visual_state_dir / ... or "
        ".renders_dir instead:\n\n" + "\n".join(violations)
    )
