#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
export PYTHONPATH="$REPO_ROOT${PYTHONPATH:+:$PYTHONPATH}"

RUN_ID=""
ISSUE_ID=""
WORKTREE_ARG=""
SHADOW=""
SPEC_GEN_TIMEOUT_SECONDS="${AUTONOMY_SPEC_GEN_TIMEOUT_SECONDS:-1200}"

usage() {
  echo "usage: supervisor.sh --run-id ID --issue ISSUE --worktree PATH [--shadow tier0|tier1]" >&2
}

while [ "$#" -gt 0 ]; do
  case "$1" in
    --run-id)
      RUN_ID="${2:-}"
      shift 2
      ;;
    --issue)
      ISSUE_ID="${2:-}"
      shift 2
      ;;
    --worktree)
      WORKTREE_ARG="${2:-}"
      shift 2
      ;;
    --shadow)
      SHADOW="${2:-}"
      case "$SHADOW" in
        tier0|tier1) ;;
        *) usage; exit 2 ;;
      esac
      shift 2
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    *)
      usage
      exit 2
      ;;
  esac
done

if [ -z "$RUN_ID" ] || [ -z "$ISSUE_ID" ] || [ -z "$WORKTREE_ARG" ]; then
  usage
  exit 2
fi
export AUTONOMY_SUPERVISOR_SHADOW="$SHADOW"

resolve_path() {
  python3 - "$1" <<'PY'
from pathlib import Path
import sys

print(Path(sys.argv[1]).expanduser().resolve(strict=False))
PY
}

load_build_constants() {
  python3 - <<'PY'
from pathlib import Path
from recoil.pipeline.tools.autonomy import constants

print(constants.WORKTREES_ROOT.expanduser().resolve(strict=False))
print((Path.home() / "CLAUDE_PROJECTS").expanduser().resolve(strict=False))
print(constants.BUILD_LEASE_TTL)
print(constants.BUILD_LEASE_HEARTBEAT)
print(constants.BUILD_WALLCLOCK_SECONDS)
PY
}

redact_file() {
  local path="$1"
  [ -f "$path" ] || return 0
  python3 - "$path" <<'PY'
from pathlib import Path
import os
import re
import sys

path = Path(sys.argv[1])
text = path.read_text(encoding="utf-8", errors="replace")
for key, value in os.environ.items():
    upper = key.upper()
    if value and len(value) >= 8 and any(token in upper for token in ("TOKEN", "KEY", "SECRET", "PASSWORD")):
        text = text.replace(value, "[REDACTED]")
patterns = (
    r"lin_api_[A-Za-z0-9_=-]+",
    r"sk-[A-Za-z0-9_-]+",
    r"gh[pousr]_[A-Za-z0-9_]+",
    r"(?i)(authorization:\s*(?:bearer\s+)?)[^\s]+",
)
for pattern in patterns:
    if pattern.startswith("(?i)(authorization:"):
        text = re.sub(pattern, r"\1[REDACTED]", text)
    else:
        text = re.sub(pattern, "[REDACTED]", text)
path.write_text(text, encoding="utf-8")
PY
}

emit_event() {
  local event_type="$1"
  local reason="${2:-}"
  local pr_url="${3:-}"
  python3 - "$RUN_ID" "$ISSUE_ID" "$event_type" "$reason" "$pr_url" "$WORKTREE" <<'PY'
from pathlib import Path
import os
import sys

from recoil.pipeline.tools.autonomy import constants, events

ledger_env = os.environ.get("AUTONOMY_EVENTS_LEDGER")
ledger = Path(ledger_env) if ledger_env else None
fields = {"worktree_path": sys.argv[6]}
if sys.argv[4]:
    fields["reason"] = sys.argv[4]
if sys.argv[5]:
    fields["pr_url"] = sys.argv[5]
events.emit(
    sys.argv[3],
    run_id=sys.argv[1],
    issue_id=sys.argv[2],
    night_id=constants.current_night_id(),
    ledger=ledger,
    **fields,
)
PY
}

emit_build_killed() {
  local reason="$1"
  emit_event "build_killed" "$reason"
}

release_claim() {
  local state="$1"
  local signature="${2:-}"
  python3 - "$ISSUE_ID" "$RUN_ID" "$state" "$signature" <<'PY'
import json
import sys

from recoil.pipeline.tools.autonomy import claim_ledger

issue_arg, run_id, state, signature = sys.argv[1:5]
failure_signature = signature or None
if claim_ledger.release(issue_arg, run_id, state, failure_signature=failure_signature):
    raise SystemExit(0)

try:
    records = claim_ledger.CLAIM_LEDGER.read_text(encoding="utf-8").splitlines()
except FileNotFoundError:
    raise SystemExit(0)

for line in reversed(records):
    if not line.strip():
        continue
    record = json.loads(line)
    if (
        record.get("state") == claim_ledger.ACTIVE_STATE
        and record.get("run_id") == run_id
        and record.get("issue_identifier") == issue_arg
    ):
        claim_ledger.release(str(record.get("issue_id")), run_id, state, failure_signature=failure_signature)
        break
PY
}

active_claim_exists() {
  python3 - "$ISSUE_ID" "$RUN_ID" <<'PY'
import json
import sys

from recoil.pipeline.tools.autonomy import claim_ledger

issue_arg, run_id = sys.argv[1:3]
try:
    records = claim_ledger.CLAIM_LEDGER.read_text(encoding="utf-8").splitlines()
except FileNotFoundError:
    raise SystemExit(1)

for line in reversed(records):
    if not line.strip():
        continue
    record = json.loads(line)
    if (
        record.get("state") == claim_ledger.ACTIVE_STATE
        and record.get("run_id") == run_id
        and (record.get("issue_id") == issue_arg or record.get("issue_identifier") == issue_arg)
    ):
        raise SystemExit(0)
raise SystemExit(1)
PY
}

project_status() {
  local text="$1"
  python3 - "$ISSUE_ID" "$RUN_ID" "$text" <<'PY'
import json
import os
import re
import sys

from recoil.pipeline.tools.autonomy import claim_ledger, linear_client

issue_arg, run_id, text = sys.argv[1:4]

def redact(value: str) -> str:
    for key, secret in os.environ.items():
        upper = key.upper()
        if secret and len(secret) >= 8 and any(token in upper for token in ("TOKEN", "KEY", "SECRET", "PASSWORD")):
            value = value.replace(secret, "[REDACTED]")
    value = re.sub(r"lin_api_[A-Za-z0-9_=-]+", "[REDACTED]", value)
    value = re.sub(r"sk-[A-Za-z0-9_-]+", "[REDACTED]", value)
    value = re.sub(r"gh[pousr]_[A-Za-z0-9_]+", "[REDACTED]", value)
    return value

issue_id = issue_arg
try:
    records = claim_ledger.CLAIM_LEDGER.read_text(encoding="utf-8").splitlines()
except FileNotFoundError:
    records = []
for line in reversed(records):
    if not line.strip():
        continue
    record = json.loads(line)
    if record.get("run_id") == run_id and record.get("issue_identifier") == issue_arg and record.get("issue_id"):
        issue_id = str(record["issue_id"])
        break

linear_client.project_status(issue_id, redact(text))
PY
}

build_spec_prompt() {
  python3 - "$ISSUE_ID" "$RUN_ID" <<'PY'
import json
import os
import sys

from recoil.pipeline.tools.autonomy import claim_ledger, linear_client

issue_arg, run_id = sys.argv[1:3]
title = ""
body = os.environ.get("AUTONOMY_SUPERVISOR_ISSUE_BODY", "")
url = ""
issue_id = issue_arg

try:
    records = claim_ledger.CLAIM_LEDGER.read_text(encoding="utf-8").splitlines()
except FileNotFoundError:
    records = []
for line in reversed(records):
    if not line.strip():
        continue
    record = json.loads(line)
    if record.get("run_id") == run_id and (
        record.get("issue_id") == issue_arg or record.get("issue_identifier") == issue_arg
    ):
        issue_id = str(record.get("issue_id") or issue_id)
        issue_arg = str(record.get("issue_identifier") or issue_arg)
        break

if not body:
    try:
        issue = linear_client.get_issue(issue_id)
        if issue:
            title = str(issue.get("title") or "")
            body = str(issue.get("body") or "")
            url = str(issue.get("url") or "")
    except Exception as exc:
        body = f"Unable to fetch Linear issue body headlessly: {exc}"

print(f"""You are authoring a BUILD_SPEC.md for the Recoil repository.

Create a complete, phase-oriented build spec for this Linear issue. Include enough context for a headless harness to implement and validate the work. Keep secrets out of the spec.

Run id: {run_id}
Issue: {issue_arg}
Title: {title}
URL: {url}

Issue body:
{body}
""")
PY
}

write_startup_marker() {
  {
    echo "## Phase Autonomy Supervisor"
    echo
    echo "run_id: $RUN_ID"
    echo "issue: $ISSUE_ID"
  } >> "$WORKTREE/build-log.md"
}

path_is_under() {
  local path="$1"
  local root="$2"
  [ "$path" = "$root" ] || [[ "$path" == "$root"/* ]]
}

WORKTREE="$(resolve_path "$WORKTREE_ARG")"
WORKTREES_ROOT=""
CANONICAL_ROOT=""
BUILD_LEASE_TTL=""
BUILD_LEASE_HEARTBEAT=""
BUILD_WALLCLOCK_SECONDS=""
constant_index=0
while IFS= read -r constant_value; do
  case "$constant_index" in
    0) WORKTREES_ROOT="$constant_value" ;;
    1) CANONICAL_ROOT="$constant_value" ;;
    2) BUILD_LEASE_TTL="$constant_value" ;;
    3) BUILD_LEASE_HEARTBEAT="$constant_value" ;;
    4) BUILD_WALLCLOCK_SECONDS="$constant_value" ;;
  esac
  constant_index=$((constant_index + 1))
done < <(load_build_constants)

if [ "$constant_index" -ne 5 ]; then
  echo "autonomy supervisor could not load build constants" >&2
  exit 2
fi

if ! path_is_under "$WORKTREE" "$WORKTREES_ROOT" || path_is_under "$WORKTREE" "$CANONICAL_ROOT"; then
  emit_build_killed "canonical_guard"
  echo "autonomy supervisor refused unsafe worktree: $WORKTREE" >&2
  exit 1
fi

HEARTBEAT_PID=""
WATCHDOG_PID=""
SUPERVISOR_PID="$$"

release_lease() {
  python3 -m recoil.pipeline.tools.autonomy.lease release --run-id "$RUN_ID" >/dev/null 2>&1 || true
}

kill_tree_signal() {
  local root_pid="$1"
  local signal="${2:-TERM}"
  local child
  local current_pid="${BASHPID:-}"

  if [ -z "$current_pid" ]; then
    current_pid="$(sh -c 'echo "$PPID"')"
  fi

  while IFS= read -r child; do
    [ -n "$child" ] || continue
    [ "$child" = "$current_pid" ] && continue
    kill_tree_signal "$child" "$signal"
  done < <(pgrep -P "$root_pid" 2>/dev/null || true)

  kill "-$signal" "$root_pid" 2>/dev/null || true
}

kill_tree() {
  local root_pid="$1"
  kill_tree_signal "$root_pid" TERM
  sleep 2
  kill_tree_signal "$root_pid" KILL
}

run_spec_generation() {
  local prompt="$1"
  local spec_path="$WORKTREE/BUILD_SPEC.md"
  local err_path="$WORKTREE/spec-gen.err"
  local tmp_err="$err_path.tmp"
  local timed_out="$WORKTREE/spec-gen.timeout"
  local claude_pid=""
  local timer_pid=""
  local status=0

  rm -f "$spec_path" "$err_path" "$tmp_err" "$timed_out"
  (
    claude -p "$prompt" --permission-mode bypassPermissions < /dev/null > "$spec_path" 2> "$tmp_err"
  ) &
  claude_pid="$!"
  (
    sleep "$SPEC_GEN_TIMEOUT_SECONDS"
    if kill -0 "$claude_pid" >/dev/null 2>&1; then
      : > "$timed_out"
      kill_tree "$claude_pid"
    fi
  ) &
  timer_pid="$!"

  set +e
  wait "$claude_pid"
  status=$?
  set -e
  kill "$timer_pid" >/dev/null 2>&1 || true
  wait "$timer_pid" 2>/dev/null || true
  mv "$tmp_err" "$err_path" 2>/dev/null || : > "$err_path"
  redact_file "$err_path"

  if [ -f "$timed_out" ]; then
    rm -f "$timed_out"
    return 124
  fi
  if [ "$status" -ne 0 ]; then
    return "$status"
  fi
  [ -s "$spec_path" ] || return 1
  return 0
}

push_wip_branch() {
  git -C "$WORKTREE" rev-parse --is-inside-work-tree >/dev/null 2>&1 || return 0
  git -C "$WORKTREE" add -A >/dev/null 2>&1 || true
  git -C "$WORKTREE" commit -q -m "WIP: autonomy wallclock $RUN_ID" >/dev/null 2>&1 || true
  git -C "$WORKTREE" push --quiet origin "HEAD:refs/heads/autonomy/wip/$RUN_ID" >/dev/null 2>&1 || true
}

cleanup() {
  local status=$?
  trap - EXIT INT TERM
  [ -z "$HEARTBEAT_PID" ] || kill "$HEARTBEAT_PID" >/dev/null 2>&1 || true
  [ -z "$WATCHDOG_PID" ] || kill "$WATCHDOG_PID" >/dev/null 2>&1 || true
  [ -z "$HEARTBEAT_PID" ] || wait "$HEARTBEAT_PID" 2>/dev/null || true
  [ -z "$WATCHDOG_PID" ] || wait "$WATCHDOG_PID" 2>/dev/null || true
  release_lease
  exit "$status"
}

start_heartbeat_loop() {
  (
    while :; do
      sleep "$BUILD_LEASE_HEARTBEAT"
      if ! python3 -m recoil.pipeline.tools.autonomy.lease heartbeat --run-id "$RUN_ID" --ttl "$BUILD_LEASE_TTL"; then
        emit_build_killed "lease_lost"
        kill_tree "$SUPERVISOR_PID"
        exit 1
      fi
    done
  ) &
  HEARTBEAT_PID="$!"
}

start_wallclock_watchdog() {
  (
    sleep "$BUILD_WALLCLOCK_SECONDS"
    emit_build_killed "wallclock"
    push_wip_branch
    release_lease
    kill_tree "$SUPERVISOR_PID"
  ) &
  WATCHDOG_PID="$!"
}

scan_rate_limit_log() {
  local log_path="$1"
  python3 - "$RUN_ID" "$ISSUE_ID" "$log_path" <<'PY'
from pathlib import Path
import os
import sys

from recoil.pipeline.tools.autonomy import constants, events, resource_gate

run_id, issue_id, log_path = sys.argv[1:4]
path = Path(log_path)
try:
    text = path.read_text(encoding="utf-8", errors="replace")
except FileNotFoundError:
    raise SystemExit(1)

if not resource_gate.is_rate_limit_error(text):
    raise SystemExit(1)

night = constants.current_night_id()
resource_gate.trip_breaker(night, text[:500])
ledger_env = os.environ.get("AUTONOMY_EVENTS_LEDGER")
ledger = Path(ledger_env) if ledger_env else None
events.emit("rate_limited", run_id=run_id, issue_id=issue_id, night_id=night, ledger=ledger, reason="build_log")
raise SystemExit(0)
PY
}

lookup_pr_url() {
  local branch=""
  branch="$(git -C "$WORKTREE" branch --show-current 2>/dev/null || true)"
  [ -n "$branch" ] || return 1
  gh pr view --head "$branch" --json url -q .url 2>/dev/null || true
}

trap cleanup EXIT INT TERM

cd "$WORKTREE"
if ! git -C "$WORKTREE" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
  if active_claim_exists; then
    emit_build_killed "invalid_worktree"
    release_claim "failed" "invalid_worktree"
    exit 1
  fi
  exit 0
fi

write_startup_marker
start_heartbeat_loop
start_wallclock_watchdog

if [ "$SHADOW" = "tier0" ]; then
  emit_event "shadow_would_dispatch" "" ""
  exit 0
fi

SPEC_PROMPT="$(build_spec_prompt)"
if ! run_spec_generation "$SPEC_PROMPT"; then
  emit_event "spec_review_failed" "spec_gen" ""
  project_status "spec generation failed - human needed"
  release_claim "failed" "spec_gen"
  exit 1
fi

if [ "$SHADOW" = "tier1" ]; then
  emit_event "shadow_would_dispatch" "" ""
  exit 0
fi

HARNESS_BIN="${AUTONOMY_HARNESS:-$WORKTREE/recoil/pipeline/tools/harness_orchestrator.sh}"
BUILD_LOG="$WORKTREE/build-log.md"
set +e
"$HARNESS_BIN" --coder codex --dir "$WORKTREE" --log "$BUILD_LOG" "$WORKTREE/BUILD_SPEC.md"
HARNESS_STATUS=$?
set -e

RATE_LIMITED=0
redact_file "$BUILD_LOG"
if scan_rate_limit_log "$BUILD_LOG"; then
  RATE_LIMITED=1
fi

if [ "$HARNESS_STATUS" -ne 0 ]; then
  if [ "$RATE_LIMITED" -eq 1 ]; then
    emit_event "build_capped" "rate_limited" ""
    release_claim "failed" "rate_limited"
  else
    emit_build_killed "harness_failed"
    release_claim "failed" "harness_failed"
  fi
  push_wip_branch
  exit "$HARNESS_STATUS"
fi

PR_URL="$(lookup_pr_url)"
if [ -n "$PR_URL" ]; then
  emit_event "pr_opened" "" "$PR_URL"
  project_status "PR opened - human merge required"
  release_claim "completed" ""
  exit 0
fi

emit_event "spec_review_failed" "pr_lookup" ""
project_status "build done but PR lookup failed - human needed"
release_claim "failed" "pr_lookup"
exit 1
