#!/bin/bash
# worktree_reaper.sh — reap COMPLETED git worktrees safely (REC-228 follow-on).
#
# Worktrees accumulate: session_workspace.sh + the dispatch chassis create one per
# build, and on CAP/land the chassis deliberately KEEPS the worktree ("worktree-kept").
# Nothing reaped them, so ~20 piled up (2026-06-21). This is the missing reaper.
#
# PROVABLY-SAFE rule — a worktree is reaped ONLY when BOTH hold:
#   1. clean      — zero uncommitted/untracked changes, AND
#   2. merged     — HEAD is an ancestor of origin/main (every commit already landed).
# That combination guarantees zero data loss: nothing uncommitted, nothing unmerged.
#
# NEVER touched (reported as KEEP):
#   - the primary checkout ($REPO)
#   - any worktree ahead of origin/main (unmerged commits — branch-backed OR detached;
#     removing a detached-ahead worktree would orphan commits, so we refuse)
#   - any DIRTY worktree (uncommitted work — even if merged; a human decides)
# Removing a worktree never deletes its branch ref, so merged branch-backed worktrees
# lose only the working directory.
#
# TIER 2 (opt-in, --include-merged-prs): a branch whose PR is MERGED on GitHub has
# landed even when squash-merge means its commits are not ancestors of main. Those
# worktrees show as "ahead+dirty" (pre-squash commits + harness build-log scribbles)
# and tier 1 keeps them forever. Tier 2 reaps them via the authoritative signal —
# `gh pr list --head <branch> --state merged` non-empty — force-removing the junk
# working dir. Requires `gh`; needs --apply to actually remove; never runs by default
# (the daily launchd job is tier-1-only, so a still-open PR is never touched).
#
# Default is DRY-RUN (prints WOULD-REAP / KEEP). Pass --apply to actually remove.
# Machine-agnostic; logs to ~/.recoil-worktree-reaper.log. Safe to run from launchd.
set -uo pipefail
REPO="${CANONICAL_REPO:-$HOME/CLAUDE_PROJECTS}"
LOG="$HOME/.recoil-worktree-reaper.log"
LIVE_WINDOW_SEC="${LIVE_WINDOW_SEC:-600}"
STALE_WIP_MTIME_SEC="${STALE_WIP_MTIME_SEC:-7200}"
APPLY=0; MERGED_PRS=0
for a in "$@"; do
  case "$a" in
    --apply) APPLY=1;;
    --include-merged-prs) MERGED_PRS=1;;
  esac
done
TS="$(date '+%Y-%m-%dT%H:%M:%S')"
cd "$REPO" 2>/dev/null || { echo "$TS no-repo at $REPO" >> "$LOG"; exit 0; }
# Repo slug for gh (tier 2). Derive from origin so it's machine-agnostic.
REPO_SLUG="$(git -C "$REPO" remote get-url origin 2>/dev/null | sed -E 's#(git@github.com:|https://github.com/)##; s#\.git$##')"

# Fresh origin/main so "merged" is accurate; tolerate offline (stale ref, conservative).
git fetch origin --quiet 2>>"$LOG" || echo "$TS fetch-failed (using stale origin/main)" >> "$LOG"
# Drop admin entries for worktree dirs that were already deleted from disk.
git worktree prune 2>>"$LOG"

lease_is_live() { # $1=worktree -> 1 if heartbeat is within LIVE_WINDOW_SEC, else 0
  python3 - "$1/.session-lease.json" "$LIVE_WINDOW_SEC" <<'PY'
import calendar
import json
import sys
import time

path, window_s = sys.argv[1:3]
try:
    window = int(window_s)
except Exception:
    window = 0

def parse_heartbeat(value):
    try:
        return int(value)
    except Exception:
        pass
    try:
        return calendar.timegm(time.strptime(value, "%Y-%m-%dT%H:%M:%SZ"))
    except Exception:
        return 0

try:
    with open(path, "r", encoding="utf-8") as handle:
        lease = json.load(handle)
    heartbeat = parse_heartbeat(lease.get("heartbeat", ""))
except Exception:
    heartbeat = 0

print(1 if heartbeat > 0 and time.time() - heartbeat <= window else 0)
PY
}

newest_work_file_age() { # $1=worktree -> seconds since newest tracked/untracked file mtime
  python3 - "$1" <<'PY'
import os
import subprocess
import sys
import time

wt = sys.argv[1]
try:
    out = subprocess.check_output(
        ["git", "-C", wt, "ls-files", "-co", "--exclude-standard", "-z"],
        stderr=subprocess.DEVNULL,
    )
except Exception:
    print(0)
    sys.exit(0)

newest = 0.0
for raw in out.split(b"\0"):
    if not raw:
        continue
    try:
        mtime = os.stat(os.path.join(wt, raw.decode("utf-8", "surrogateescape"))).st_mtime
    except OSError:
        continue
    if mtime > newest:
        newest = mtime

print(int(max(0, time.time() - newest)) if newest else 0)
PY
}

reaped=0; kept=0; stale_wip=0; wt=""; br=""
flush() {
  [ -z "$wt" ] && return
  if [ "$wt" = "$REPO" ]; then wt=""; br=""; return; fi
  local dirty merged
  dirty=$(git -C "$wt" status --porcelain --untracked-files=all 2>/dev/null | grep -c . | tr -d ' ')
  if git -C "$wt" merge-base --is-ancestor HEAD origin/main 2>/dev/null; then merged=1; else merged=0; fi
  if [ "$merged" = 1 ] && [ "${dirty:-1}" = 0 ]; then
    if [ "$APPLY" = 1 ]; then
      if git worktree remove "$wt" 2>>"$LOG"; then
        echo "$TS REAPED $wt [$br]" >> "$LOG"; echo "REAPED      $wt  [$br]"; reaped=$((reaped+1))
      else
        echo "$TS reap-FAILED $wt [$br]" >> "$LOG"; echo "FAILED      $wt  [$br]"
      fi
    else
      echo "WOULD-REAP  $wt  [$br]"
    fi
  else
    # Tier 2: branch-backed worktree whose PR is MERGED on GitHub → landed (squash). Reap.
    if [ "$MERGED_PRS" = 1 ] && [ "$br" != "(detached)" ] && [ -n "$REPO_SLUG" ] && command -v gh >/dev/null 2>&1; then
      local nmerged
      nmerged=$(gh pr list -R "$REPO_SLUG" --head "$br" --state merged --json number --jq 'length' 2>/dev/null || echo 0)
      if [ "${nmerged:-0}" -gt 0 ] 2>/dev/null; then
        if [ "$APPLY" = 1 ]; then
          if git worktree remove --force "$wt" 2>>"$LOG"; then
            echo "$TS REAPED(merged-PR) $wt [$br]" >> "$LOG"; echo "REAPED(PR)  $wt  [$br]"; reaped=$((reaped+1))
          else
            echo "$TS reap-FAILED $wt [$br]" >> "$LOG"; echo "FAILED      $wt  [$br]"
          fi
        else
          echo "WOULD-REAP-PR  $wt  [$br]  (merged PR — would force-remove dirty/ahead junk)"
        fi
        wt=""; br=""; return
      fi
    fi
    local reason=""
    [ "$merged" = 0 ] && reason="unmerged(ahead-of-main)"
    [ "${dirty:-1}" != 0 ] && reason="${reason:+$reason,}dirty=$dirty"
    if [ "${dirty:-1}" != 0 ]; then
      local lease_live mtime_age
      lease_live="$(lease_is_live "$wt")"
      mtime_age="$(newest_work_file_age "$wt")"
      if [ "${lease_live:-0}" != 1 ] && [ "${mtime_age:-0}" -gt "$STALE_WIP_MTIME_SEC" ] 2>/dev/null; then
        echo "STALE-WIP   $wt  [$br]  ($reason, no-live-lease, mtime>${mtime_age}s)"; kept=$((kept+1)); stale_wip=$((stale_wip+1))
        wt=""; br=""; return
      fi
    fi
    echo "KEEP        $wt  [$br]  ($reason)"; kept=$((kept+1))
  fi
  wt=""; br=""
}

while IFS= read -r line; do
  case "$line" in
    "worktree "*) flush; wt="${line#worktree }";;
    "branch refs/heads/"*) br="${line#branch refs/heads/}";;
    "detached") br="(detached)";;
  esac
done < <(git worktree list --porcelain)
flush

echo "$TS summary: reaped=$reaped kept=$kept stale_wip=$stale_wip apply=$APPLY" >> "$LOG"
tail -n 500 "$LOG" > "$LOG.tmp" 2>/dev/null && mv "$LOG.tmp" "$LOG"
echo ""
echo "reaper: reaped=$reaped kept=$kept stale_wip=$stale_wip (apply=$APPLY)  log=$LOG"
