#!/usr/bin/env bash
# Orchestrator-bound heartbeat watcher (SSOT for the bound-liveness loop).
#
# Writes the attempt heartbeat once SYNCHRONOUSLY, then self-daemonizes a refresh
# loop that re-touches it every WATCH_INTERVAL_S until the orchestrator PID dies
# OR its start-time stops matching (CRITICAL-3: the watcher MUST die with the
# orchestrator — a watcher that outlives a kill -9 keeps the heartbeat fresh and
# the zombie is never detected). Returns immediately after the initial write, so
# a caller can proceed to `transition ATTEMPT_RUNNING` without racing ahead of
# first liveness. Shared by /dispatch (attempt 1) and dispatch_reaper.sh
# (re-dispatch) so the liveness contract lives in exactly one place.
#
# Usage (call in the FOREGROUND — it backgrounds its own loop):
#   dispatch_heartbeat_watch.sh <run_dir> <attempt> <pid> <start_time> <session> <log_path>
set -uo pipefail

RUN_DIR="$1"
ATTEMPT="$2"
PID="$3"
START_TIME="$4"
SESSION="$5"
LOG_PATH="$6"
WATCH_INTERVAL_S="${WATCH_INTERVAL_S:-60}"

ATTEMPT_DIR="$(printf '%s/attempt-%03d' "$RUN_DIR" "$ATTEMPT")"
HEARTBEAT="$ATTEMPT_DIR/heartbeat.json"

write_heartbeat() {
  mkdir -p "$ATTEMPT_DIR"
  python3 - "$HEARTBEAT" "$RUN_DIR/status.json" "$ATTEMPT" "$PID" "$START_TIME" "$SESSION" "$LOG_PATH" <<'PY'
import datetime as dt
import json
import os
import sys
from pathlib import Path

path = Path(sys.argv[1])
with open(sys.argv[2], "r", encoding="utf-8") as handle:
    status = json.load(handle)
payload = {
    "run_id": status.get("run_id"),
    "attempt": int(sys.argv[3]),
    "orchestrator_pid": int(sys.argv[4]),
    "orchestrator_start_time": sys.argv[5],
    "tmux_session": sys.argv[6],
    "ps_match": "harness_orchestrator.sh",
    "log_path": sys.argv[7],
    "phase_hint": None,
    "round_hint": None,
    "updated_at": dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
}
tmp = path.with_name(f".{path.name}.{os.getpid()}.tmp")
tmp.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
tmp.replace(path)
PY
}

# Synchronous initial heartbeat.
write_heartbeat

# Self-daemonized bound refresh loop (orphaned to launchd/init when we return).
(
  while kill -0 "$PID" 2>/dev/null; do
    current_start="$(ps -p "$PID" -o lstart= 2>/dev/null | sed 's/^ *//;s/ *$//')"
    [ "$current_start" = "$START_TIME" ] || break
    write_heartbeat || break
    sleep "$WATCH_INTERVAL_S"
  done
) >/dev/null 2>&1 &
