#!/usr/bin/env python3
"""Path count telemetry — counts canonical execution paths and reports deltas.

Scans the recoil/ codebase for functions matching accretion-sentinel patterns,
compares to last run, and outputs a MORNING_REVIEW-compatible summary.

Usage:
    python3 recoil/architecture/tools/path_count_telemetry.py
    python3 recoil/architecture/tools/path_count_telemetry.py --manifest recoil/architecture/ssot_manifest.yaml
    python3 recoil/architecture/tools/path_count_telemetry.py --dry-run

State file: recoil/architecture/.path_count_state.json (auto-created)
"""
from __future__ import annotations

import argparse
import ast
import fnmatch
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[3]
RECOIL_DIR = REPO_ROOT / "recoil"
STATE_FILE = Path("recoil/architecture/.path_count_state.json")

# Patterns that indicate potential accretion (function names to count)
SENTINEL_PATTERNS = [
    r"^execute_",
    r"^write_.*sidecar",
    r"^assemble_",
    r"^build_.*payload",
    r"^resolve_.*ref",
    r"^dispatch_",
]

# Exclusions
EXCLUDED_DIRS = {"tests", "__pycache__", ".worktrees", "_archive"}
EXCLUDED_FILES = {"*mock*", "*fixture*", "*_test*", "test_*"}

MANIFEST_DRIFT_PATTERN = re.compile(
    r'^\+(?:async )?def (execute_\w+|write_\w*sidecar\w*|assemble_\w+|build_\w*payload\w*|resolve_\w*ref\w*|dispatch_\w+)\s*\('
)


def _check_git_diff(n_commits: int, manifest_path: Path) -> list[str]:
    """Check git diff for new sentinel functions not registered in manifest."""
    import subprocess

    try:
        result = subprocess.run(
            ["git", "diff", f"HEAD~{n_commits}", "HEAD"],
            capture_output=True, text=True, check=True,
        )
        diff_text = result.stdout
    except subprocess.CalledProcessError:
        return ["WARNING: git diff failed — cannot check manifest drift"]

    try:
        import yaml
        manifest = yaml.safe_load(manifest_path.read_text()) if manifest_path.exists() else {}
    except Exception:
        manifest = {}

    # Build set of all known function names from manifest
    known_names: set[str] = set()
    for cap_data in manifest.get("capabilities", {}).values():
        for path_str in [cap_data.get("canonical", "")] + cap_data.get("deprecated_paths", []):
            if "::" in path_str:
                known_names.add(path_str.split("::", 1)[1])

    drift = []
    lines = diff_text.splitlines()
    for i, line in enumerate(lines):
        m = MANIFEST_DRIFT_PATTERN.match(line)
        if not m:
            continue
        func_name = m.group(1)
        if func_name in known_names:
            continue
        # Check for [SSOT: ...] comment on same or next line
        ssot_tagged = False
        check_lines = lines[i:i+3]
        for cl in check_lines:
            if "# [SSOT:" in cl:
                ssot_tagged = True
                break
        if not ssot_tagged:
            drift.append(
                f"[MANIFEST DRIFT]: new function `{func_name}` not in manifest "
                f"and not tagged with # [SSOT: <canonical>]"
            )

    return drift


def _is_excluded(path: Path) -> bool:
    parts = set(path.parts)
    if parts & EXCLUDED_DIRS:
        return True
    name = path.name
    for pat in EXCLUDED_FILES:
        if fnmatch.fnmatch(name, pat):
            return True
    return False


def _count_sentinel_functions(scan_dir: Path) -> dict[str, list[str]]:
    """Returns {pattern: [file::func, ...]} for all matching functions."""
    results: dict[str, list[str]] = {p: [] for p in SENTINEL_PATTERNS}

    for py_file in scan_dir.rglob("*.py"):
        if _is_excluded(py_file):
            continue
        try:
            source = py_file.read_text(encoding="utf-8")
            tree = ast.parse(source, filename=str(py_file))
        except (SyntaxError, OSError):
            continue

        rel = str(py_file.relative_to(REPO_ROOT))
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                name = node.name
                for pat in SENTINEL_PATTERNS:
                    if re.match(pat, name):
                        results[pat].append(f"{rel}::{name}")

    return results


def _find_orphaned_deprecated(manifest_path: Path) -> list[str]:
    """Find deprecated_paths entries that have zero callers in production code."""
    try:
        import yaml
        manifest = yaml.safe_load(manifest_path.read_text())
    except Exception:
        return []

    orphaned = []
    capabilities = manifest.get("capabilities", {})

    for cap_name, cap_data in capabilities.items():
        if cap_data.get("state") == "tombstoned":
            continue
        for dep_path in cap_data.get("deprecated_paths", []):
            # Skip narrative notes
            if " " in dep_path or "(" in dep_path:
                continue
            if "::" not in dep_path:
                continue
            file_rel, symbol = dep_path.split("::", 1)
            file_path = REPO_ROOT / file_rel
            if not file_path.exists():
                orphaned.append(f"{cap_name}/{dep_path} (file not found — eligible for manifest cleanup)")
                continue
            # Count callers (rough grep)
            try:
                source = RECOIL_DIR.parent.read_text if False else None
                caller_count = 0
                for py_file in RECOIL_DIR.rglob("*.py"):
                    if _is_excluded(py_file):
                        continue
                    try:
                        content = py_file.read_text()
                        if symbol in content:
                            caller_count += 1
                    except OSError:
                        continue
                if caller_count <= 1:  # only self-reference
                    orphaned.append(f"{cap_name}: {dep_path} ({caller_count} callers — eligible for /converge deletion)")
            except Exception:
                pass

    return orphaned


def _load_state() -> dict:
    if STATE_FILE.exists():
        try:
            return json.loads(STATE_FILE.read_text())
        except Exception:
            pass
    return {}


def _save_state(counts: dict[str, int]) -> None:
    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
    state = {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "counts": counts,
    }
    STATE_FILE.write_text(json.dumps(state, indent=2))


def main() -> int:
    p = argparse.ArgumentParser(description="Path count telemetry")
    p.add_argument("--manifest", default="recoil/architecture/ssot_manifest.yaml")
    p.add_argument("--dry-run", action="store_true", help="Print output without saving state")
    p.add_argument("--output", choices=["text", "json"], default="text")
    p.add_argument("--git-diff-check", type=int, metavar="N",
                   help="Check last N commits for unregistered new sentinel functions")
    args = p.parse_args()

    if args.git_diff_check:
        drift_issues = _check_git_diff(args.git_diff_check, Path(args.manifest))
        if drift_issues:
            print("## Manifest Drift Detected:")
            for issue in drift_issues:
                print(f"  {issue}")
        else:
            print("## Manifest Drift: none detected")
        return 0

    results = _count_sentinel_functions(RECOIL_DIR)
    current_counts = {pat: len(matches) for pat, matches in results.items()}
    total_current = sum(current_counts.values())

    prev_state = _load_state()
    prev_counts = prev_state.get("counts", {})
    prev_total = sum(prev_counts.values())
    delta = total_current - prev_total if prev_counts else 0

    # Find orphaned deprecated paths
    manifest_path = Path(args.manifest)
    orphaned = _find_orphaned_deprecated(manifest_path) if manifest_path.exists() else []

    if args.output == "json":
        out = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "total_sentinel_functions": total_current,
            "delta_from_last_run": delta,
            "by_pattern": {pat: len(m) for pat, m in results.items()},
            "orphaned_deprecated": orphaned,
        }
        print(json.dumps(out, indent=2))
    else:
        delta_str = f"+{delta}" if delta > 0 else str(delta)
        print(f"\n## Net Path Count Change: {delta_str}")
        print(f"Total sentinel functions: {total_current} (was {prev_total if prev_counts else 'unknown'})")
        print()
        for pat, matches in results.items():
            if matches:
                pat_label = pat.replace("^", "").replace(".*", "*").replace("_$", "_")
                print(f"  {pat_label}: {len(matches)}")
        if orphaned:
            print()
            print("## Orphaned deprecated paths (eligible for /converge deletion):")
            for o in orphaned:
                print(f"  - {o}")
        print()

    if not args.dry_run:
        _save_state(current_counts)

    return 0


if __name__ == "__main__":
    sys.exit(main())
