#!/usr/bin/env python3
"""One-shot F5 slug merge cleanup.

This migration removes the drifted ``assets/loc/corridor/`` fixture twin and
the dead ``assets/loc/loc.json`` v3-migration index. It intentionally does not
touch the canonical ``assets/loc/int_lower_decks_corridor/`` location, the
bible, or quarantined historical batch files.

Idempotent: once both cleanup targets are gone, repeated runs are clean no-ops.
Use ``--dry-run`` to print the deletion plan without writing anything.
"""

from __future__ import annotations

import argparse
import json
import os
import shutil
import stat
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

# Allow `import recoil...` without an editable install (repo root = 4 up).
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent))

from recoil.core.paths import ProjectPaths


DRIFTED_LOCATION = "corridor"
CANONICAL_LOCATION = "int_lower_decks_corridor"
REAL_MEDIA_THRESHOLD_BYTES = 5 * 1024
MIGRATION_NAME = "f5_slug_merge"
WHY = (
    "F5: corridor/ duplicate of int_lower_decks_corridor; "
    "loc.json dead v3-migration index; SHAFT_PROBE stale key"
)
SPEC_POINTER = (
    "consultations/recoil/breakdown-input-layer-2026-06-11/"
    "BUILD_SPEC_breakdown-c1c2.md#phase-6-f5-slug-merge"
)
QUARANTINE_NOTE = (
    "The 6 quarantined 2026-06-01 batch files with location_id='corridor' "
    "stay untouched; they are dead quarantine history with no consumer."
)
CANONICAL_STEM_DRIFT_NOTE = (
    "The canonical int_lower_decks_corridor directory is untouched. Its "
    "internal hyphen/underscore stem drift is out of scope for this migration."
)


def _utc_now() -> datetime:
    return datetime.now(timezone.utc)


def _rel(path: Path, root: Path) -> str:
    try:
        return path.relative_to(root).as_posix()
    except ValueError:
        return path.as_posix()


def _inventory_path(path: Path, project_root: Path) -> dict[str, Any]:
    st = path.lstat()
    mode = st.st_mode
    item: dict[str, Any] = {
        "path": _rel(path, project_root),
        "size": st.st_size,
    }
    if stat.S_ISLNK(mode):
        item["kind"] = "symlink"
        item["target"] = os.readlink(path)
        item["target_exists"] = path.exists()
    elif stat.S_ISDIR(mode):
        item["kind"] = "directory"
    elif stat.S_ISREG(mode):
        item["kind"] = "file"
    else:
        item["kind"] = "other"
    return item


def _inventory_tree(root: Path, project_root: Path) -> list[dict[str, Any]]:
    if not root.exists() and not root.is_symlink():
        return []
    items = [_inventory_path(root, project_root)]
    if root.is_dir() and not root.is_symlink():
        for child in sorted(root.rglob("*"), key=lambda p: p.as_posix()):
            items.append(_inventory_path(child, project_root))
    return items


def _large_regular_files(root: Path, project_root: Path) -> list[dict[str, Any]]:
    if not root.exists() or root.is_symlink():
        return []
    offenders: list[dict[str, Any]] = []
    for child in sorted(root.rglob("*"), key=lambda p: p.as_posix()):
        if child.is_symlink() or not child.is_file():
            continue
        size = child.stat().st_size
        if size >= REAL_MEDIA_THRESHOLD_BYTES:
            offenders.append({"path": _rel(child, project_root), "size": size})
    return offenders


def _single_file_inventory(path: Path, project_root: Path) -> list[dict[str, Any]]:
    if path.exists() or path.is_symlink():
        return [_inventory_path(path, project_root)]
    return []


def _build_plan(project: str, paths: ProjectPaths) -> dict[str, Any]:
    project_root = paths.project_root
    corridor_dir = paths.asset_subject_dir("loc", DRIFTED_LOCATION)
    loc_index = paths.asset_class_dir("loc") / "loc.json"
    canonical_dir = paths.asset_subject_dir("loc", CANONICAL_LOCATION)

    corridor_inventory = _inventory_tree(corridor_dir, project_root)
    loc_index_inventory = _single_file_inventory(loc_index, project_root)
    delete_targets = []
    if corridor_inventory:
        delete_targets.append(_rel(corridor_dir, project_root))
    if loc_index_inventory:
        delete_targets.append(_rel(loc_index, project_root))

    return {
        "schema_version": 1,
        "migration": MIGRATION_NAME,
        "project": project,
        "why": WHY,
        "spec": SPEC_POINTER,
        "delete_targets": delete_targets,
        "deleted_inventory": {
            "corridor_dir": corridor_inventory,
            "loc_index": loc_index_inventory,
        },
        "safety": {
            "real_media_threshold_bytes": REAL_MEDIA_THRESHOLD_BYTES,
            "large_regular_files": _large_regular_files(corridor_dir, project_root),
        },
        "scope_boundary": {
            "canonical_location": _rel(canonical_dir, project_root),
            "canonical_location_untouched": True,
            "canonical_stem_drift_note": CANONICAL_STEM_DRIFT_NOTE,
            "bible_untouched": True,
        },
        "quarantine_policy": {
            "status": "untouched",
            "note": QUARANTINE_NOTE,
        },
    }


def _write_json_atomic(path: Path, payload: dict[str, Any]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_name(f".{path.name}.tmp")
    tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n")
    os.replace(tmp, path)


def _receipt_path(paths: ProjectPaths, now: datetime) -> Path:
    date = now.strftime("%Y-%m-%d")
    return paths.history_migration_dir / f"{MIGRATION_NAME}_{date}.json"


def _remove_path(path: Path) -> None:
    if path.is_symlink() or path.is_file():
        path.unlink()
    elif path.is_dir():
        shutil.rmtree(path)


def migrate(project: str, *, dry_run: bool = False, now: datetime | None = None) -> int:
    paths = ProjectPaths.for_project(project)
    plan = _build_plan(project, paths)

    if plan["safety"]["large_regular_files"]:
        print(
            "[migrate_f5_slug_merge] ABORT: real media candidate(s) found "
            f"under assets/loc/{DRIFTED_LOCATION}/",
            file=sys.stderr,
        )
        print(json.dumps(plan["safety"]["large_regular_files"], indent=2), file=sys.stderr)
        return 1

    if dry_run:
        print("[migrate_f5_slug_merge] DRY RUN - no files will be changed")
        print(json.dumps(plan, indent=2, sort_keys=True))
        return 0

    if not plan["delete_targets"]:
        print("[migrate_f5_slug_merge] already clean (no-op)")
        return 0

    corridor_dir = paths.asset_subject_dir("loc", DRIFTED_LOCATION)
    loc_index = paths.asset_class_dir("loc") / "loc.json"

    _remove_path(corridor_dir)
    _remove_path(loc_index)

    run_now = now or _utc_now()
    receipt = {
        **plan,
        "created_at": run_now.isoformat(),
        "deleted_targets": plan["delete_targets"],
    }
    receipt_path = _receipt_path(paths, run_now)
    _write_json_atomic(receipt_path, receipt)

    print(f"[migrate_f5_slug_merge] deleted: {plan['delete_targets']}")
    print(f"[migrate_f5_slug_merge] wrote receipt: {receipt_path}")
    return 0


def main(argv: list[str] | None = None) -> int:
    ap = argparse.ArgumentParser(
        description="Delete the drifted F5 corridor slug and dead loc.json index."
    )
    ap.add_argument("--project", required=True)
    ap.add_argument("--dry-run", action="store_true")
    args = ap.parse_args(argv)
    return migrate(args.project, dry_run=args.dry_run)


if __name__ == "__main__":
    raise SystemExit(main())
