#!/usr/bin/env python3
"""Brick B — tartarus reference-substrate data cleanup (A1 follow-on).

One-time migration that brings the tartarus asset tree into the A1 SSOT shape:
archive dead stubs, promote one canonical hero per subject to the shelf via the
merged ``promote_to_canonical`` (hyphen ``{subject}-identity.ext`` at the look
root, round-trip asserted), archive legacy-flat identity files, retire the
REC-129 bogus ``CRYOPOD_PROBE`` dir. Scope is the FOUNDATION_AUDIT.md mud map.

SAFETY (hardened after codex review 2026-06-17):
  * ``--dry-run`` (DEFAULT) prints the plan and mutates NOTHING.
  * ``--apply`` mutates. It writes the archive dir + TOMBSTONE.md FIRST, then
    ARCHIVES (moves) — never deletes our own targets. Every removed file lands
    under ``assets/_archive/ssot_cleanup_<UTC>/`` preserving its original
    relative path. Archive destinations are collision-checked (abort if exists).
  * Before each promote, any pre-existing shelf hero at the look root is archived
    (so ``promote_to_canonical``'s old-hero unlink can never lose data).
  * EP001-critical subjects (WREN, JADE, the SH37 location) promote FIRST and a
    failure on any of them ABORTS; non-critical (other props/locs, kit, varek)
    failures are logged + skipped so one malformed sidecar can't strand the run.
  * Hero overrides are validated: relative, normalized, under assets/, a real file.
  * Post-verify: every promoted char resolves a shelf hero; EP001_SH37's cast
    passes the fail-closed gate. Any failure exits non-zero.
"""
from __future__ import annotations

import argparse
import json
import os
import shutil
import sys
from pathlib import Path

RECOIL_ROOT = Path(__file__).resolve().parents[2]
sys.path.insert(0, str(RECOIL_ROOT.parent))

PROJECTS_ROOT = Path(
    os.environ.get("RECOIL_PROJECTS_ROOT",
                   str(Path.home() / "Dropbox/CLAUDE_DATA/recoil/projects"))
)
TARTARUS = PROJECTS_ROOT / "tartarus"
ASSETS = TARTARUS / "assets"

# ── FOUNDATION_AUDIT §2 dead stubs. (The 2 CRYOPOD_PROBE stubs are NOT here —
# they live inside prop/CRYOPOD_PROBE, retired wholesale in step [4].) ──
DEAD_STUBS = [
    "char/jade/base/pool/identity/jade_identity_back_v01.png",
    "char/jade/base/pool/identity/jade_identity_closeup_v01.png",
    "char/jade/base/pool/identity/jade_identity_front_v01.png",
    "char/jade/base/pool/identity/jade_identity_hero_v01.png",
    "char/jade/base/pool/identity/jade_identity_profile_v01.png",
    "char/jade/base/pool/identity/jade_identity_three-quarter_v01.png",
]

# subject → (class, canonical-hero pool relpath). Chars use JT-CONFIRMED
# Seedream-labeled heroes (2026-06-17; NOT the "highest-res" audit auto-pick,
# which mis-chose Jade's v2-2 — the identity-lock research + JT both reject it).
# Props/locs keep the FOUNDATION_AUDIT §6.2 pick (no seedream hero exists).
HEROES = {
    "cryo_pod": ("prop", "prop/cryo_pod/base/pool/identity/cryo_pod_identity_hero_v02.png"),
    "debt_counter": ("prop", "prop/debt_counter/base/pool/identity/debt-counter_prop_hero_v01.png"),
    "int_lower_decks_corridor": ("loc", "loc/int_lower_decks_corridor/base/pool/identity/int-lower-decks-corridor_loc_hero_v01.png"),
    "int_lower_decks_maintenance_shaft": ("loc", "loc/int_lower_decks_maintenance_shaft/base/pool/identity/int-lower-decks-maintenance-shaft_loc_hero_v01.png"),
    "jade": ("char", "char/jade/base/pool/identity/jade_identity_hero-seedream-gray_v01.jpeg"),
    "kit": ("char", "char/kit/base/pool/identity/kit_identity_character-r3-candidate-05-beauty_v01.png"),
    "salvage_hook": ("prop", "prop/salvage_hook/base/pool/identity/salvage-hook_prop_hero_v01.png"),
    "varek": ("char", "char/varek/base/pool/identity/varek_identity_hero-seedream-gray_v01.jpeg"),
    "warden_drone": ("prop", "prop/warden_drone/base/pool/identity/warden_drone_ref.png"),
    "wren": ("char", "char/wren/base/pool/identity/wren_identity_hero-seedream-gray_v01.jpeg"),
}

BOGUS_DIRS = ["prop/CRYOPOD_PROBE"]
EP001_CAST = ["WREN", "JADE"]
# EP001_SH37-critical subjects (promote first; failure aborts):
CRITICAL = ["wren", "jade", "int_lower_decks_maintenance_shaft"]


def _archive_root(stamp: str) -> Path:
    return ASSETS / "_archive" / f"ssot_cleanup_{stamp}"


def _archive(rel: str, stamp: str, apply: bool, log: list) -> None:
    src = ASSETS / rel
    if not src.exists():
        log.append(f"  SKIP (absent): {rel}")
        return
    dest = _archive_root(stamp) / rel
    if dest.exists():
        raise RuntimeError(f"archive dest already exists (stale stamp / re-run?): {dest}")
    log.append(f"  ARCHIVE: {rel}")
    if apply:
        dest.parent.mkdir(parents=True, exist_ok=True)
        shutil.move(str(src), str(dest))


def _archive_existing_shelf_hero(subject: str, cls: str, stamp: str, apply: bool, log: list) -> None:
    """promote_to_canonical hard-deletes old shelf heroes; archive them first."""
    look = ASSETS / cls / subject / "base"
    if not look.exists():
        return
    for p in sorted(look.glob(f"{subject}-identity.*")) + sorted(look.glob(f"{subject}_identity.*")):
        if p.is_file():
            _archive(str(p.relative_to(ASSETS)), stamp, apply, log)


def _legacy_flat_identity_files(subject: str, cls: str) -> list[str]:
    subj_dir = ASSETS / cls / subject
    if not subj_dir.exists():
        return []
    return [str(p.relative_to(ASSETS)) for p in sorted(subj_dir.glob(f"{subject}_identity_*")) if p.is_file()]


def _normalize_sidecar(src: Path, apply: bool, log: list) -> None:
    """Fix a malformed pool sidecar (derived_from as a non-dict) so promote_to_canonical
    can setdefault/assign on it without TypeError. Targeted, idempotent."""
    sc = src.with_suffix(src.suffix + ".json")
    if not sc.exists():
        return
    try:
        data = json.loads(sc.read_text(encoding="utf-8"))
    except Exception:
        return
    if not isinstance(data.get("derived_from"), dict):
        log.append(f"  NORMALIZE sidecar derived_from (was {type(data.get('derived_from')).__name__}): {sc.name}")
        if apply:
            data["derived_from"] = {} if data.get("derived_from") is None else {"_legacy": data["derived_from"]}
            sc.write_text(json.dumps(data, indent=2), encoding="utf-8")


def _validate_override(relpath: str) -> str:
    p = Path(relpath)
    if p.is_absolute():
        raise SystemExit(f"override must be relative to assets/: {relpath}")
    full = (ASSETS / p).resolve()
    if not str(full).startswith(str(ASSETS.resolve()) + os.sep):
        raise SystemExit(f"override escapes assets/: {relpath}")
    if not full.is_file():
        raise SystemExit(f"override is not an existing file: {relpath}")
    return str(full.relative_to(ASSETS.resolve()))


def run(apply: bool, stamp: str, hero_overrides: dict) -> int:
    from recoil.workspace.sidecar import promote_to_canonical
    from recoil.core.paths import ProjectPaths
    from recoil.core.ref_resolver import resolve_reference_bundle

    heroes = dict(HEROES)
    for subj, path in hero_overrides.items():
        if path:
            heroes[subj] = (heroes[subj][0], _validate_override(path))

    log: list = []
    mode = "APPLY" if apply else "DRY-RUN"
    log.append(f"=== Brick B — tartarus SSOT cleanup [{mode}] ===")
    log.append(f"projects_root: {PROJECTS_ROOT}")
    log.append(f"archive dir:   assets/_archive/ssot_cleanup_{stamp}/\n")

    # Tombstone FIRST (apply) so any early abort still leaves a restore note.
    if apply:
        tomb = _archive_root(stamp) / "TOMBSTONE.md"
        tomb.parent.mkdir(parents=True, exist_ok=True)
        tomb.write_text(
            f"# SSOT cleanup {stamp}\n\nBrick B (A1 follow-on). Archived dead stubs + "
            f"legacy-flat identity files + pre-existing shelf heroes + the REC-129 "
            f"CRYOPOD_PROBE dir; promoted heroes to the shelf via promote_to_canonical. "
            f"Restore by moving a path back to its original location under assets/. "
            f"If the run aborted mid-way, this dir holds whatever was archived before "
            f"the abort. See consultations/recoil/asset-ref-ssot-coherence-2026-06-17/"
            f"FOUNDATION_AUDIT.md.\n", encoding="utf-8")

    # 1. Archive dead stubs first (clean pool before reconcile).
    log.append(f"[1] Archive {len(DEAD_STUBS)} dead stubs:")
    for rel in DEAD_STUBS:
        _archive(rel, stamp, apply, log)

    # 2. Promote heroes — CRITICAL first; per-subject isolation.
    order = CRITICAL + [s for s in sorted(heroes) if s not in CRITICAL]
    log.append(f"\n[2] Promote {len(heroes)} heroes (CRITICAL first; promote_to_canonical):")
    promoted, promote_failures = [], []
    for subj in order:
        cls, relpath = heroes[subj]
        src = ASSETS / relpath
        if not src.is_file():
            msg = f"{subj}: hero source missing/not-a-file: {relpath}"
            if subj in CRITICAL:
                log.append(f"  ABORT (critical) — {msg}")
                print("\n".join(log))
                return 3
            promote_failures.append(msg)
            log.append(f"  SKIP (non-critical) — {msg}")
            continue
        tag = "CRITICAL" if subj in CRITICAL else "aux"
        log.append(f"  PROMOTE [{tag}]: {subj} ({cls})  <-  {relpath}")
        _archive_existing_shelf_hero(subj, cls, stamp, apply, log)
        _normalize_sidecar(src, apply, log)
        if apply:
            try:
                promote_to_canonical(src, cls, subj, TARTARUS, kind="identity")
                promoted.append((subj, cls))
            except Exception as e:
                msg = f"{subj}: promote raised {type(e).__name__}: {e}"
                if subj in CRITICAL:
                    log.append(f"  ABORT (critical) — {msg}")
                    print("\n".join(log))
                    return 4
                promote_failures.append(msg)
                log.append(f"  SKIP (non-critical) — {msg}")
        else:
            promoted.append((subj, cls))

    # 3. Archive legacy-flat identity files for char subjects (post-promote).
    log.append("\n[3] Archive legacy-flat identity files (char subjects):")
    for subj in sorted(heroes):
        cls = heroes[subj][0]
        if cls != "char":
            continue
        for rel in _legacy_flat_identity_files(subj, cls):
            _archive(rel, stamp, apply, log)

    # 4. Retire bogus dirs.
    log.append("\n[4] Retire REC-129 bogus dirs:")
    for rel in BOGUS_DIRS:
        _archive(rel, stamp, apply, log)

    # 5. Verify.
    log.append("\n[5] Verify:")
    if not apply:
        log.append("  (dry-run — skipped; --apply runs resolver + EP001_SH37 gate verification)")
        if promote_failures:
            log.append("  NOTE non-critical promote concerns: " + "; ".join(promote_failures))
        print("\n".join(log))
        return 0

    pp = ProjectPaths.from_root(TARTARUS)
    failures = list(promote_failures)
    # Verify EVERY promoted subject (all classes, incl. the EP001-critical loc)
    # resolves a shelf hero — not just chars (codex CRITICAL).
    for subj, cls in promoted:
        bundle = resolve_reference_bundle(pp, cls, subj.upper(), "identity")
        hh = [a for a in bundle.by_role("identity") if getattr(a, "is_hero", False)]
        if not hh:
            failures.append(f"{subj} ({cls}): no shelf hero resolved post-promote")
        elif hh[0].source != "shelf":
            failures.append(f"{subj} ({cls}): hero from {hh[0].source}, not shelf")
        else:
            log.append(f"  OK: {subj} ({cls}) -> shelf hero {hh[0].path.name}")

    # EP001_SH37 gate proof — resolve WREN/JADE WITH their real wardrobe phases
    # (the live collector passes phase; this proves A1's phase-agnostic retry on
    # the real cast, not just an unphased hand-built bundle). codex MAJOR.
    EP001_PHASES = {"WREN": "wren_phase_1_pure_function", "JADE": "jade_phase_1_full_mask"}
    try:
        from recoil.core.ref_gate import assert_refs_complete
        from recoil.core.ref_types import ReferenceBundle
        assets = []
        for cid in EP001_CAST:
            b = resolve_reference_bundle(pp, "char", cid, "identity", phase=EP001_PHASES[cid])
            assets.extend(b.by_role("identity"))
        assert_refs_complete(shot_id="EP001_SH37", required_subjects=EP001_CAST,
                             bundle=ReferenceBundle(tuple(assets)),
                             board_gated=False, board_ref_path=None)
        log.append("  OK: EP001_SH37 fail-closed gate PASSES for [WREN, JADE] (with real wardrobe phases)")
    except Exception as e:
        failures.append(f"EP001_SH37 gate still blocks: {type(e).__name__}: {e}")

    if failures:
        log.append("\nVERIFY FAILED:")
        for f in failures:
            log.append(f"  ✗ {f}")
        print("\n".join(log))
        return 5
    log.append("\n✅ VERIFY PASSED — shelf heroes resolve; EP001_SH37 gate green.")
    print("\n".join(log))
    return 0


def main():
    ap = argparse.ArgumentParser(description="Brick B — tartarus SSOT data cleanup")
    ap.add_argument("--apply", action="store_true", help="perform the mutation (default: dry-run)")
    ap.add_argument("--stamp", default=None, help="archive stamp (default: fresh UTC timestamp)")
    ap.add_argument("--wren-hero", default=None, help="override WREN canonical hero (rel path under assets/)")
    ap.add_argument("--jade-hero", default=None, help="override JADE canonical hero (rel path under assets/)")
    args = ap.parse_args()
    if not TARTARUS.exists():
        print(f"FATAL: tartarus project not found at {TARTARUS}", file=sys.stderr)
        sys.exit(2)
    stamp = args.stamp
    if not stamp:
        import subprocess
        stamp = subprocess.run(["date", "-u", "+%Y%m%dT%H%M%SZ"], capture_output=True, text=True).stdout.strip()
    overrides = {"wren": args.wren_hero, "jade": args.jade_hero}
    sys.exit(run(args.apply, stamp, overrides))


if __name__ == "__main__":
    main()
