#!/usr/bin/env python3
"""rename_torch_to_jade.py — one-off Tartarus character rename.

Replaces 'Torch' (the character) with 'Jade' across in-scope files using
word-boundary regex. Word-boundary form `\\b[Tt]orch\\b|\\bTORCH\\b` will NOT
match `Torchlight`, `torches`, `blowtorch`, `torchbearer`, `torchy` — verified.

In scope (rename happens):
  - projects/tartarus/ (except archive, backups, sessions, deep_fix payloads,
    script_doctor state, batch summaries, revision log, prompt-ab tests,
    .gemini_backup files)
  - recoil/pipeline/data/render_manifests/episodes/ (live render manifests)
  - recoil/engine-memory/LEARNINGS.md (single mention)
  - recoil/.claude/CLAUDE.md (if any character mentions)
  - recoil/docs/ (if any)

Out of scope (preserved untouched):
  - consultations/starsend/ (historical consult docs from earlier project state)
  - consultations/recoil/ (historical consult docs — including this rename arch doc)
  - .claude/worktrees/ (scratch git worktrees)
  - projects/leviathan/ (different project)
  - projects/_archive/, projects/starsend-test/, projects/time-enough-for-love/
  - conversation-archive/ (frozen historical chats)
  - flickr_scrape/, scripts/output/, writing-ingestion/ (unrelated 'torch' text)
  - cortex/, comfyui_setup/, labs/scripts/ (unrelated)
  - generation-architecture/, projects/generation-architecture/ (test fixtures)
  - any path containing _archive/, /backups/, .backup, .gemini_backup
  - any path under tartarus/sessions/, tartarus/state/script_doctor_*,
    tartarus/state/batch_*_summary, tartarus/state/deep_fix_*,
    tartarus/state/revision_log, tartarus/tests/prompt-ab/

Derived files (NOT touched here — regenerate via /compile):
  - projects/tartarus/TARTARUS_COMPLETE.fountain
  - projects/tartarus/TARTARUS_WITH_METADATA.fountain
  - projects/tartarus/compiled/tartarus_full.md

Usage:
    python3 recoil/pipeline/tools/rename_torch_to_jade.py
    python3 recoil/pipeline/tools/rename_torch_to_jade.py --apply

--dry-run is the default. --apply flips the switch.
"""

import argparse
import re
import sys
from pathlib import Path

REPO = Path("/Users/joeturnerlin/CLAUDE_PROJECTS")

# Roots to scan
IN_SCOPE_ROOTS = [
    REPO / "projects" / "tartarus",
    REPO / "recoil" / "pipeline" / "data",  # widened — covers render_manifests/, camera_tested/, etc.
    REPO / "recoil" / "engine-memory",
    REPO / "recoil" / ".claude",
    REPO / "recoil" / "docs",
]

# Substring matches that exclude a path (any match → skip)
EXCLUDE_SUBSTRINGS = [
    "/archive/",
    "/_archive/",
    "/backups/",
    ".backup",
    ".gemini_backup",
    "/sessions/",
    "/script_doctor_",
    "/batch_",
    "/deep_fix_",
    "/revision_log",
    "/migration_log",  # historical filesystem migration audit trail
    "/prompt-ab/",
    "/tests/",
    "/.git/",
]

# Filenames to NEVER touch (derived, must be regenerated)
DERIVED_FILES = {
    "TARTARUS_COMPLETE.fountain",
    "TARTARUS_WITH_METADATA.fountain",
    "tartarus_full.md",
}

# Filename patterns to skip
SKIP_FILENAME_PATTERNS = [
    re.compile(r"\.pyc$"),
    re.compile(r"\.png$"),
    re.compile(r"\.jpg$"),
    re.compile(r"\.jpeg$"),
    re.compile(r"\.mp4$"),
    re.compile(r"\.webp$"),
    re.compile(r"\.zip$"),
    re.compile(r"\.tar"),
    re.compile(r"^\.DS_Store$"),
]

# Replacement rules: match `torch` as an *identifier token*, not just a word.
# Using lookaround instead of `\b` so we treat `_` (a word char) as a token
# separator. This handles `torch_phase_1`, `torch_wardrobe_torch_phase_2_…`,
# `path/to/torch/torch_back.png`, etc.
#
# Negative lookbehind: not preceded by alphanum → preserved cases like
#   `blowtorch`, `blowtorch_x` (preceded by `w`, alphanum).
# Negative lookahead: not followed by alpha → preserved cases like
#   `Torchlight`, `torches`, `Torchy`, `torchbearer` (followed by alpha).
#
# The lookaround also matches at `_` and `-` boundaries, so `torch-Echo` →
# `jade-Echo` and `TORCH_S_DEAD_CREW` → `JADE_S_DEAD_CREW`.
PATTERNS = [
    (re.compile(r"(?<![a-zA-Z0-9])TORCH(?![a-zA-Z])"), "JADE"),
    (re.compile(r"(?<![a-zA-Z0-9])Torch(?![a-zA-Z])"), "Jade"),
    (re.compile(r"(?<![a-zA-Z0-9])torch(?![a-zA-Z])"), "jade"),
]


def is_in_scope(path: Path) -> bool:
    p = str(path)
    if any(s in p for s in EXCLUDE_SUBSTRINGS):
        return False
    if path.name in DERIVED_FILES:
        return False
    for pat in SKIP_FILENAME_PATTERNS:
        if pat.search(path.name):
            return False
    return True


def find_target_files() -> list[Path]:
    """Walk IN_SCOPE_ROOTS, return all files that contain Torch references."""
    candidates: list[Path] = []
    for root in IN_SCOPE_ROOTS:
        if not root.exists():
            continue
        for p in root.rglob("*"):
            if not p.is_file():
                continue
            if not is_in_scope(p):
                continue
            try:
                text = p.read_text(encoding="utf-8", errors="strict")
            except (UnicodeDecodeError, IsADirectoryError, OSError):
                continue
            for pat, _ in PATTERNS:
                if pat.search(text):
                    candidates.append(p)
                    break
    return candidates


def rename_text(text: str) -> tuple[str, int]:
    """Apply all patterns. Return (new_text, total_replacement_count)."""
    total = 0
    for pat, repl in PATTERNS:
        new_text, n = pat.subn(repl, text)
        text = new_text
        total += n
    return text, total


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Rename Tartarus character Torch → Jade",
    )
    parser.add_argument(
        "--apply",
        action="store_true",
        help="Actually perform the rename (default is dry-run)",
    )
    parser.add_argument(
        "--show-files",
        action="store_true",
        help="List every targeted file (otherwise just summary)",
    )
    args = parser.parse_args()

    files = find_target_files()
    print(f"Found {len(files)} in-scope files containing Torch references.")
    print()

    by_dir: dict[str, list[Path]] = {}
    total_changes = 0
    for f in files:
        text = f.read_text(encoding="utf-8")
        _, count = rename_text(text)
        total_changes += count
        rel = str(f.relative_to(REPO))
        bucket = "/".join(rel.split("/")[:3])
        by_dir.setdefault(bucket, []).append(f)

    print(f"Total replacements: {total_changes}")
    print()
    print("By directory bucket:")
    for bucket in sorted(by_dir, key=lambda k: -len(by_dir[k])):
        print(f"  {len(by_dir[bucket]):4d}  {bucket}/")

    if args.show_files:
        print()
        print("All files:")
        for f in sorted(files):
            print(f"  {f.relative_to(REPO)}")

    if not args.apply:
        print()
        print("Dry-run complete. Re-run with --apply to execute.")
        return 0

    print()
    print("Applying rename...")
    written = 0
    for f in files:
        text = f.read_text(encoding="utf-8")
        new_text, count = rename_text(text)
        if new_text != text:
            f.write_text(new_text, encoding="utf-8")
            written += 1
    print(f"Rewrote {written} files. {total_changes} total replacements applied.")
    return 0


if __name__ == "__main__":
    sys.exit(main())
