#!/usr/bin/env bash
# harness_orchestrator.sh v2
# External bash orchestrator for multi-phase BUILD_SPEC execution.
# Replaces the Claude-internal harness skill with a crash-proof,
# resumable loop that sends each phase as a fresh `claude -p` call.
#
# Usage:
#   harness_orchestrator.sh <path/to/BUILD_SPEC.md>
#   harness_orchestrator.sh --dry-run [--coder codex|claude] [--dir <path>] [--log <path>] [--no-codex-spec-review] <path/to/BUILD_SPEC.md>
#
# Compatible with macOS bash 3.2 and homebrew bash 5.x.
# No jq, no GNU-only extensions.

set -u  # Treat unset variables as errors

if [ -z "${HARNESS_ORCHESTRATOR_SNAPSHOT:-}" ]; then
    __orig_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    __snap="${TMPDIR:-/tmp}/harness_orchestrator.$$.${RANDOM}.sh"
    if cp "${BASH_SOURCE[0]}" "$__snap" 2>/dev/null; then
        export HARNESS_ORCHESTRATOR_SNAPSHOT=1 HARNESS_ORIGINAL_SCRIPT_DIR="$__orig_dir" HARNESS_SNAPSHOT_PATH="$__snap"
        exec bash "$__snap" "$@"
        echo "WARNING: harness self-snapshot exec failed; continuing from mutable script." >&2
    else
        echo "WARNING: harness self-snapshot copy failed; continuing from mutable script." >&2
    fi
fi

if [ -n "${HARNESS_SNAPSHOT_PATH:-}" ]; then
    trap 'rm -f "${HARNESS_SNAPSHOT_PATH:-}" 2>/dev/null || true' EXIT
fi

autoland_extract_allowed_regex() {
    local spec_file="$1"
    awk '
        /^[[:space:]]*#/ { next }
        /^[[:space:]]*ALLOWED='\''(.*)'\''[[:space:]]*$/ {
            count += 1
            line=$0
            sub(/^[[:space:]]*ALLOWED='\''/, "", line)
            sub(/'\''[[:space:]]*$/, "", line)
            allowed=line
        }
        END {
            if (count == 1) {
                print allowed
            } else {
                exit 1
            }
        }
    ' "$spec_file"
}

autoland_diff_within_allowlist() {
    local spec_file="$1" allowed path
    shift || return 2
    allowed="$(autoland_extract_allowed_regex "$spec_file" 2>/dev/null)" || return 2
    [ -z "$allowed" ] && return 2   # empty ALLOWED='' must NOT fail-open (treat as no-allowlist)
    if [ "$#" -eq 0 ]; then
        while IFS= read -r path; do
            [ -z "$path" ] && continue
            if ! printf '%s\n' "$path" | grep -Eq "$allowed"; then
                return 1
            fi
        done
        return 0
    fi
    while [ "$#" -gt 0 ]; do
        path="$1"
        shift
        if ! printf '%s\n' "$path" | grep -Eq "$allowed"; then
            return 1
        fi
    done
    return 0
}

if [ "${1:-}" = "__autoland_allowlist_check" ]; then
    shift
    if [ "$#" -lt 1 ]; then
        echo "Usage: $0 __autoland_allowlist_check <spec-file> [changed-path ...]" >&2
        exit 64
    fi
    autoland_diff_within_allowlist "$@"
    exit $?
fi

# ── Argument parsing ──────────────────────────────────────────────

DRY_RUN=false
DEFAULT_CODER="codex"
WORKING_DIR_OVERRIDE=""
BUILD_LOG_OVERRIDE=""
SKIP_SPEC_REVIEW=false
SPEC_FILE=""
SCRIPT_DIR="${HARNESS_ORIGINAL_SCRIPT_DIR:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}"

usage() {
    echo "Usage: $0 [--dry-run] [--coder codex|claude] [--dir <path>] [--log <path>] [--no-codex-spec-review] <path/to/BUILD_SPEC.md>" >&2
}

while [ $# -gt 0 ]; do
    arg="$1"
    case "$arg" in
        --dry-run)
            DRY_RUN=true
            shift
            ;;
        --coder)
            if [ $# -lt 2 ]; then
                echo "Error: --coder requires codex or claude" >&2
                usage
                exit 1
            fi
            DEFAULT_CODER="$2"
            case "$DEFAULT_CODER" in
                codex|claude) ;;
                *)
                    echo "Error: --coder must be codex or claude" >&2
                    usage
                    exit 1
                    ;;
            esac
            shift 2
            ;;
        --dir)
            if [ $# -lt 2 ]; then
                echo "Error: --dir requires a path" >&2
                usage
                exit 1
            fi
            WORKING_DIR_OVERRIDE="$2"
            shift 2
            ;;
        --log)
            if [ $# -lt 2 ]; then
                echo "Error: --log requires a path" >&2
                usage
                exit 1
            fi
            BUILD_LOG_OVERRIDE="$2"
            shift 2
            ;;
        --no-codex-spec-review)
            SKIP_SPEC_REVIEW=true
            shift
            ;;
        # Compatibility no-ops for legacy /harness flags that dispatch
        # (.claude/skills/dispatch/SKILL.md) appends by default (e.g. --debug).
        # This substrate does not implement them, but it must not reject the
        # documented unattended/overnight dispatch path. Boolean legacy flags.
        --debug|--no-lint|--no-commit|--no-code-review|--no-simplify)
            shift
            ;;
        # Value-taking legacy /harness flags: swallow the flag and its argument.
        --debug-max|--max-retries|--phases|--validate|--project)
            shift
            [ $# -gt 0 ] && shift
            ;;
        --*)
            echo "Error: unexpected argument '$arg'" >&2
            usage
            exit 1
            ;;
        *)
            if [ -z "$SPEC_FILE" ]; then
                SPEC_FILE="$arg"
                shift
            else
                echo "Error: unexpected argument '$arg'" >&2
                usage
                exit 1
            fi
            ;;
    esac
done

if [ -z "$SPEC_FILE" ] || [ ! -f "$SPEC_FILE" ]; then
    usage
    exit 1
fi

# ── Resolve paths ─────────────────────────────────────────────────
# MUST_FIX #3: WORKING_DIR = project root, not the spec folder.
# Build log lives next to the spec; temps live in /tmp.

SPEC_FILE="$(cd "$(dirname "$SPEC_FILE")" && pwd)/$(basename "$SPEC_FILE")"
SPEC_DIR="$(dirname "$SPEC_FILE")"
REPO_ROOT="$(cd "$SPEC_DIR" && git rev-parse --show-toplevel 2>/dev/null)"

# Resolve the SUBPROJECT root the spec actually lives in. Existing specs under a
# subproject (e.g. recoil/pipeline/BUILD_SPEC.md) author dispatch targets and
# validation `cd`s relative to that subproject root (`execution/...`,
# `pipeline/...`, `cd .../recoil`), NOT the monorepo git root. Dispatching agents
# from the git root would resolve those relative targets at the wrong level.
# Walk up from the spec dir to the NEAREST ancestor (at or below the git root)
# containing a pyproject.toml — the canonical Python project boundary — and use
# that. Falls back to the git root, then to $HOME/CLAUDE_PROJECTS. An explicit
# --dir override still wins.
resolve_project_root() {
    local dir="$SPEC_DIR" stop="${REPO_ROOT:-/}"
    while :; do
        if [ -f "$dir/pyproject.toml" ]; then
            printf '%s\n' "$dir"
            return 0
        fi
        [ "$dir" = "$stop" ] && break
        [ "$dir" = "/" ] && break
        dir="$(dirname "$dir")"
    done
    return 1
}

if [ -n "$WORKING_DIR_OVERRIDE" ]; then
    WORKING_DIR="$WORKING_DIR_OVERRIDE"
else
    WORKING_DIR="$(resolve_project_root)" || WORKING_DIR="${REPO_ROOT:-$HOME/CLAUDE_PROJECTS}"
fi
# Canonicalize to an ABSOLUTE path BEFORE we cd. A relative --dir would leave
# WORKING_DIR relative, so the later `git -C "$WORKING_DIR"` / `codex -C
# "$WORKING_DIR"` calls would re-resolve it against the NEW cwd (e.g. `--dir
# recoil` becoming .../recoil/recoil) and run in the wrong directory.
WORKING_DIR_UNRESOLVED="$WORKING_DIR"
WORKING_DIR="$(cd "$WORKING_DIR" 2>/dev/null && pwd)" || {
    echo "FATAL: working directory not found: $WORKING_DIR_UNRESOLVED" >&2
    exit 1
}
cd "$WORKING_DIR" || exit 1

SESSION_NAME="$(basename "$SPEC_DIR")"
# Key the default build log by the SPEC FILENAME too — multiple specs in one
# directory must NOT share a log (else a crashed run of spec A is "resumed" by
# spec B and reads stale phase statuses).
BUILD_LOG="${BUILD_LOG_OVERRIDE:-$SPEC_DIR/build-log-${SESSION_NAME}-$(basename "$SPEC_FILE" .md).md}"
TMP_OUT="${TMPDIR:-/tmp}/.harness_out.$$"
TMP_PROMPT="${TMPDIR:-/tmp}/.harness_prompt.$$"
VAL_SCRIPT="${TMPDIR:-/tmp}/.harness_val.$$.sh"

MAX_RETRIES=3
VALIDATION_TIMEOUT=300  # 5 minutes

# Dispatch-mode EXIT trap state. These defaults must exist before the trap is
# installed so `set -u` cannot make the trap abort on an early failure.
RUN_DIR="${DISPATCH_RUN_DIR:-}"
ATTEMPT="${DISPATCH_ATTEMPT:-1}"
CONVERGE_STATUS="${CONVERGE_STATUS:-UNKNOWN}"
CURRENT_PHASE="${CURRENT_PHASE:-preflight}"
LAST_GATE="${LAST_GATE:-}"
LAST_VALIDATION_COMMAND="${LAST_VALIDATION_COMMAND:-}"
TERMINAL_CAUSE_HINT="${TERMINAL_CAUSE_HINT:-unknown}"
TERMINAL_FAILURE_REASON="${TERMINAL_FAILURE_REASON:-}"
PR_URL="${PR_URL:-}"

# ── NICE_TO_HAVE #16: Git identity pre-check ─────────────────────
# Only enforced for real builds (which commit). --dry-run is a pure
# parse/diagnostic pass that never commits, so it must not require a
# configured identity (fresh CI/Studio checkouts have none yet).

git_email="$(git config user.email 2>/dev/null || true)"
git_name="$(git config user.name 2>/dev/null || true)"
if ! $DRY_RUN && { [ -z "$git_email" ] || [ -z "$git_name" ]; }; then
    echo "FATAL: git identity not configured (user.email='$git_email', user.name='$git_name')." >&2
    echo "Set with: git config user.email '...'; git config user.name '...'" >&2
    exit 1
fi

echo "Starting Harness Orchestrator v2 for: $SESSION_NAME"
echo "Working Directory: $WORKING_DIR"
echo "Spec File: $SPEC_FILE"
echo "Build Log: $BUILD_LOG"
echo "Git identity: $git_name <$git_email>"
echo ""

# ── HELPER FUNCTIONS ──────────────────────────────────────────────

# Extract content for a specific phase from the spec.
# Handles fractional IDs (1, 1.5, 1.6, etc.).
get_phase_content() {
    local phase="$1"
    # Escape dots in phase ID for regex (1.5 -> 1\.5)
    local escaped_phase
    escaped_phase="$(printf '%s' "$phase" | sed 's/\./\\./g')"
    awk -v p="^## Phase ${escaped_phase}[: ]" -v stop="^## (Phase |Pre-flight|Post-Build|Dependency|Risk)" '
        $0 ~ p {flag=1; print; next}
        flag && /^---$/ {exit}
        flag && $0 ~ stop {exit}
        flag {print}
    ' "$SPEC_FILE"
}

get_preflight_extraction() {
    awk '
        /^## Pre-flight Extraction/ {flag=1; print; next}
        flag && /^---$/ {exit}
        flag && /^## / {exit}
        flag {print}
    ' "$SPEC_FILE"
}

# Extract validation command from a phase.
# NICE_TO_HAVE #14: Uses printf instead of echo to avoid mangling -e/-n.
get_validation_cmd() {
    local phase="$1"
    local cmd
    # 1) Fenced bash block under a "### Validation" heading.
    cmd="$(get_phase_content "$phase" | awk '
        /^### Validation/ {flag=1; next}
        flag && /^```bash/ {code=1; next}
        code && /^```/ {exit}
        code {print}
    ')"
    # 2) Inline "### Validation: <cmd>" or "**Validation:** <cmd>" (backticks
    #    stripped). Existing specs use these single-line forms — parse them so a
    #    phase with documented validation never passes WITHOUT running its gate.
    if [ -z "$cmd" ]; then
        cmd="$(get_phase_content "$phase" | awk '
            /^### Validation:[[:space:]]*[^[:space:]]/ { sub(/^### Validation:[[:space:]]*/, ""); gsub(/`/, ""); print; exit }
            /^\*\*Validation:\*\*[[:space:]]*[^[:space:]]/ { sub(/^\*\*Validation:\*\*[[:space:]]*/, ""); gsub(/`/, ""); print; exit }
        ')"
    fi
    # 3) Fallback to a global "## Validation command" fenced block.
    if [ -z "$cmd" ]; then
        cmd="$(awk '
            /^## Validation command/ {flag=1; next}
            flag && /^```bash/ {code=1; next}
            code && /^```/ {exit}
            code {print}
        ' "$SPEC_FILE")"
    fi
    printf '%s' "$cmd"
}

# Read the last status for a phase from the build log.
get_phase_status() {
    local phase="$1"
    if [ ! -f "$BUILD_LOG" ]; then printf 'PENDING'; return; fi
    local escaped_phase
    escaped_phase="$(printf '%s' "$phase" | sed 's/\./\\./g')"
    local status
    status="$(awk -v p="^## Phase ${escaped_phase}:" '
        /^## Phase/ { if ($0 ~ p) in_phase=1; else in_phase=0 }
        in_phase && /^- Status:/ { print $3 }
    ' "$BUILD_LOG" | tail -n 1)"
    printf '%s' "${status:-PENDING}"
}

# Extract the phase name from the spec header line.
get_phase_name() {
    local phase="$1"
    local escaped_phase
    escaped_phase="$(printf '%s' "$phase" | sed 's/\./\\./g')"
    grep -E "^## Phase ${escaped_phase}[: ]" "$SPEC_FILE" \
        | sed -E "s/^## Phase [0-9]+(\.[0-9]+)?[a-z]?[: ][[:space:]]*//"
}

log_status() {
    local phase="$1"
    local phase_name="$2"
    local status="$3"
    printf '\n## Phase %s: %s\n- Status: %s\n' "$phase" "$phase_name" "$status" >> "$BUILD_LOG"
    echo ">> Phase $phase marked as $status"
}

format_failure_reason() {
    local gate="$1"
    local exit_code="$2"
    local output_file="$3"
    local context="$4"
    local tail_text=""
    [ -n "$gate" ] || gate="${LAST_GATE:-${CURRENT_PHASE:-unknown}}"
    [ -n "$exit_code" ] || exit_code="unknown"
    printf 'Gate/step: %s\n' "$gate"
    printf 'Phase: %s\n' "${CURRENT_PHASE:-unknown}"
    printf 'Exit code: %s\n' "$exit_code"
    if [ -n "${LAST_VALIDATION_COMMAND:-}" ]; then
        printf 'Validation command: %s\n' "$LAST_VALIDATION_COMMAND"
    fi
    if [ -n "$context" ]; then
        printf 'Context: %s\n' "$context"
    fi
    if [ -n "$output_file" ] && [ -f "$output_file" ]; then
        tail_text="$(tail -n 20 "$output_file" 2>/dev/null | sed '/^[[:space:]]*$/d' || true)"
    fi
    if [ -n "$tail_text" ]; then
        printf 'Output tail:\n%s\n' "$tail_text"
    else
        printf 'Output tail: <no captured output>\n'
    fi
}

record_failure_reason() {
    local exit_code="$1"
    local output_file="${2:-$TMP_OUT}"
    local context="${3:-}"
    local gate="${4:-${LAST_GATE:-${CURRENT_PHASE:-unknown}}}"
    TERMINAL_FAILURE_REASON="$(format_failure_reason "$gate" "$exit_code" "$output_file" "$context")"
    export TERMINAL_FAILURE_REASON
}

append_failure_reason_to_log() {
    local exit_code="$1"
    local output_file="${2:-$TMP_OUT}"
    local context="${3:-}"
    local gate="${4:-${LAST_GATE:-${CURRENT_PHASE:-unknown}}}"
    record_failure_reason "$exit_code" "$output_file" "$context" "$gate"
    printf '**Failure reason:**\n```text\n%s\n```\n' "$TERMINAL_FAILURE_REASON" >> "$BUILD_LOG"
}

# Emit git ':(exclude)' pathspecs for the harness's OWN artifacts (build log,
# per-phase transcripts, end-of-build review log, stale-log archives) so that
# no-op-dispatch detection ignores them. When the spec dir lives inside the
# working repo (the default), log_status + the redirected PHASE_LOG dirty the
# tree before the no-op check; without these excludes a FAILED dispatch that made
# NO code changes would look "changed" and skip the hard-fail gate (fail-OPEN).
# Each pathspec is repo-relative and only emitted when the artifact path is under
# $WORKING_DIR (outside-repo artifacts never appear in `git status` anyway).
harness_artifact_excludes() {
    local p
    for p in \
        "$BUILD_LOG" \
        "$BUILD_LOG.stale-*" \
        "$SPEC_DIR/phase-*-claude.log" \
        "$SPEC_DIR/end-of-build-review.log"; do
        case "$p" in
            "$WORKING_DIR"/*) printf '%s\n' ":(exclude)${p#$WORKING_DIR/}" ;;
        esac
    done
}

# A fingerprint of the working tree's current diff (vs HEAD, vs index, and
# untracked files), excluding the harness's own artifacts. Compared before/after
# a dispatch so a no-op is detected even when the tree was ALREADY dirty: a bare
# "is the tree clean now" check would mis-read a pre-existing dirty tree as
# "engine made changes" and skip the failed-no-op-dispatch hard-fail (fail-OPEN).
tree_fingerprint() {
    local _ex
    local EX=()
    while IFS= read -r _ex; do
        [ -n "$_ex" ] && EX+=("$_ex")
    done < <(harness_artifact_excludes)
    {
        git -C "$WORKING_DIR" diff HEAD -- . "${EX[@]+"${EX[@]}"}" 2>/dev/null
        git -C "$WORKING_DIR" status --porcelain -- . "${EX[@]+"${EX[@]}"}" 2>/dev/null
    } | git hash-object --stdin 2>/dev/null
}

resolve_codex_binary() {
    if [ -n "${HARNESS_CODEX_BIN:-}" ]; then
        [ -x "$HARNESS_CODEX_BIN" ] || return 1
        printf '%s' "$HARNESS_CODEX_BIN"
        return 0
    fi
    if [ -x /Applications/Codex.app/Contents/Resources/codex ]; then
        printf '%s' /Applications/Codex.app/Contents/Resources/codex
        return 0
    fi
    command -v codex 2>/dev/null
}

dispatch_phase() {
    local PROMPT="$1"
    local engine="$2"
    dispatch_budget_add_round
    case "$engine" in
        codex)
            local CODEX
            CODEX="$(resolve_codex_binary)" || {
                echo "FATAL: engine=codex but no codex binary found" >&2
                return 127
            }
            # codex exec dispatch path
            "$CODEX" exec \
                --skip-git-repo-check \
                -s workspace-write \
                -c approval_policy=never \
                -c model=gpt-5.5 \
                -c model_reasoning_effort=high \
                -C "$WORKING_DIR" \
                "$PROMPT" </dev/null
            ;;
        claude)
            HARNESS_MODE=1 claude -p "$PROMPT" --model opus --effort max --dangerously-skip-permissions
            ;;
        *)
            echo "FATAL: unsupported engine '$engine' (expected codex or claude)" >&2
            return 2
            ;;
    esac
}

build_convergence_review_contract_block() {
    local spec_file="$1"
    printf '%s\n' 'CONVERGENCE REVIEW CONTRACT'
    printf 'Review contract file: %s\n' "$spec_file"
    printf '%s\n' 'You MUST verify each BUILD_SPEC requirement is actually implemented in the diff.'
    printf '%s\n' 'You MUST flag any test that mocks/stubs/fakes the exact thing under test, including a faked output filename or a stubbed layer above the code being verified.'
    printf '%s\n' ''
    printf '%s\n' 'BUILD_SPEC content:'
    printf '%s\n' '```markdown'
    cat "$spec_file"
    printf '%s\n' '```'
}

build_claude_convergence_review_prompt() {
    local spec_file="$1"
    printf '%s\n\n' '/code-review high --fix'
    build_convergence_review_contract_block "$spec_file"
}

get_phase_engine() {
    local phase="$1"
    local engine
    # Strip bold markers (**engine:** ...) and a trailing parenthetical/italic
    # annotation (engine: claude *(trivial)*) before splitting on ':'. Existing
    # specs use all of these forms — normalize them so dispatch routes correctly
    # instead of silently defaulting or aborting.
    engine="$(get_phase_content "$phase" | sed -n '1,5p' | sed -E 's/\*//g' | awk -F: '
        { gsub(/^[ \t]+|[ \t]+$/, "", $1) }
        tolower($1) == "engine" {
            v=$2
            sub(/\(.*/, "", v)       # drop trailing "(...)" annotation
            gsub(/^[ \t]+|[ \t]+$/, "", v)
            sub(/[ \t].*/, "", v)    # keep only the first token of the value
            print tolower(v)
            exit
        }
    ')"
    if [ -z "$engine" ]; then
        engine="$DEFAULT_CODER"
    fi
    # Map legacy model aliases onto the two engines this substrate supports.
    # Claude-family models (opus/sonnet/haiku/claude) -> claude; codex -> codex.
    # Genuinely unsupported engines (e.g. gemini, a deferred feature) MUST fail
    # CLOSED here — never silently downgrade to a different engine.
    case "$engine" in
        opus|sonnet|haiku|claude) printf '%s' "claude" ;;
        codex) printf '%s' "codex" ;;
        *)
            echo "FATAL: unsupported engine '$engine' in Phase $phase" >&2
            return 2
            ;;
    esac
}

append_codex_footer() {
    cat <<'FOOTER_EOF' >> "$1"

Codex execution discipline:
Edit files directly in the working tree; do not print code blocks. Do NOT modify the validation gate, test fixtures, or acceptance assertions named in this spec — they are frozen; if a gate looks wrong, STOP and say so, do not edit it. Do NOT run git commit — the orchestrator commits after it validates.
FOOTER_EOF
}

spec_without_fences() {
    awk '
        /^```/ { in_fence = !in_fence; next }
        !in_fence { print }
    ' "$SPEC_FILE"
}

dispatch_target_lines() {
    # ONLY the authoritative "Files to create/modify" sections declare what a phase
    # MODIFIES. Requirements prose may CITE a dispatch file for context (e.g. a
    # docs build documenting execution/providers/flora.py, or "per model_profiles.json")
    # without touching it — scanning Requirements caused false-positive audit blocks.
    #
    # The section opener matches the "Files to create/modify" heading at ANY level
    # (## / ### / ####) with ANY prefix and ANY capitalization — real specs write
    # `### AREA 5. Files to modify`, `## Files to modify`, `#### Files to create`,
    # `### New files to create`, `### Files to Modify`, `## FILES TO MODIFY`,
    # `### Files to create/modify`, `### Files to modify (3 files)`, etc. Anchoring to
    # a bare lowercase `### Files to modify` silently skipped every prefixed / other-
    # level / other-CASE variant, so a genuine flora.py declared under such a heading
    # escaped the gate once the Requirements backstop was removed (the 5cbf2e76
    # weakening). Matching is via tolower($0) so ALL capitalizations are covered.
    #
    # This gate is FAIL-CLOSED: a MISSED files-heading (false-negative) ships an
    # unaudited dispatch change — the dangerous direction — whereas an over-matched
    # heading only over-blocks (annoying, recoverable). So the opener is intentionally
    # GENEROUS: a plain substring match with NO tail anchor and NO heading-class
    # carve-out. Any text-based carve-out (e.g. "skip `## Phase N` headings") would
    # itself be a DODGE — a spec could hide a real `flora.py` declaration under the
    # excluded heading and ship unaudited — so we deliberately do not add one. The
    # only cost is a theoretical over-block on a heading that LITERALLY contains
    # "files to create/modify" (no real spec does — `^## phase.*files to (create|
    # modify)` matches nothing in the repo), which is the safe direction anyway.
    # The closer matches ANY heading so an opened section always ends at the next
    # heading and can never absorb a later prose/Requirements section — that (not
    # heading exclusion) is what preserves the 5cbf2e76 docs false-positive fix.
    #
    # Heading detection uses `^[[:space:]]*#+[[:space:]]` — legal CommonMark ATX
    # headings allow up to 3 leading spaces AND a TAB (not just a space) after the
    # `#`s. A literal-space `^#+ ` silently missed `###\tFiles to modify` and
    # `   ### Files to modify` → 0 targets → another never-weaken slip. Matching any
    # leading/separating whitespace closes that (fail-closed: over-inclusive is safe).
    #
    # The verb set is a DENYLIST, not an allowlist: a heading gates if its verb is
    # anything OTHER than a known read-only/context verb. An allowlist of write-verbs
    # is incomplete by construction — unbounded modification verbs (rename, patch,
    # migrate, regenerate, add, remove, wire, rework, ...) would each be a never-weaken
    # bypass. Read verbs are a small enumerable set, so we gate everything EXCEPT
    # read/review/look/verify/reference/inspect/check/audit/keep/list/examine/see.
    #
    # Classification keys on the FIRST "files to <verb>" phrase, extracted with match()
    # so it is tied to that one occurrence: a later parenthetical like
    # `### Files to modify (files to read first)` cannot flip a modify-heading to
    # read-only (Codex R5 CRITICAL — a non-anchored negative regex matched the later
    # "files to read"). Internal whitespace is [[:space:]]+, so a TAB or double space
    # inside `Files  to  modify` still parses (R5 MAJOR). A spec that merely READS
    # flora.py for context does not touch the dispatch path, so the read-only verbs are
    # excluded (the citation false-positive 5cbf2e76 fixed); ANY other first verb (incl.
    # future ones) gates — fail-closed. KNOWN MARGINAL EDGE: a mixed
    # `### Files to read and modify` heading (read-only verb FIRST, write verb after) is
    # classified read-only; no corpus spec mixes read+write verbs in one heading, so it
    # is left as a noted limitation rather than over-engineered into per-token parsing.
    spec_without_fences | awk '
        {
            h = tolower($0)
            if (h ~ /^[[:space:]]*#+[[:space:]]/) {                       # any ATX heading (also closes a section)
                in_files = 0
                if (match(h, /files[[:space:]]+to[[:space:]]+[a-z]+/)) {   # first "files to <verb>"
                    verb = substr(h, RSTART, RLENGTH)
                    sub(/^files[[:space:]]+to[[:space:]]+/, "", verb)      # isolate <verb>
                    if (verb !~ /^(read|review|look|verify|reference|inspect|check|audit|keep|list|examine|see)$/)
                        in_files = 1                                       # modification verb → open the file list
                }
                next
            }
            if (in_files && $0 !~ /targeted path matches/) print
        }
    '
}

get_dispatch_touching_targets() {
    local lines
    lines="$(dispatch_target_lines)"
    # Docs (*.md, including CLAUDE.md) and test files (tests/, *_test.*, test_*) live
    # UNDER recoil/pipeline|execution|core but do NOT touch the live dispatch path —
    # excluding them stops a docs/test build from being falsely flagged. Genuine
    # dispatch code (providers, dispatch/registry, model_profiles.json, PROMPT_BIBLE)
    # still flags. NEVER-WEAKEN: a real *.py / config modification still blocks.
    {
        # Absolute (monorepo-root-relative) form: recoil/pipeline/..., recoil/execution/...
        printf '%s\n' "$lines" | grep -Eo 'recoil/(pipeline|execution|core)/[^`" )]+' || true
        # Subproject-relative form: existing specs authored relative to the recoil
        # project root write targets as `pipeline/...`, `execution/...`, `core/...`
        # (no `recoil/` prefix). Anchor to a path boundary so we don't grab the tail
        # of an unrelated path (e.g. lib/pipeline/...), then strip the leading char.
        printf '%s\n' "$lines" \
            | grep -Eo '(^|[^A-Za-z0-9_./-])(pipeline|execution|core)/[^`" )]+' \
            | sed -E 's/^[^A-Za-z]//' || true
        printf '%s\n' "$lines" | grep -Eo 'PROMPT_BIBLE\.yaml|model_profiles\.json' || true
    } | grep -Ev '(\.md([^A-Za-z0-9]|$)|/tests?/|(^|/)test_|_test\.|/docs/)' || true
}

has_real_audit_phase() {
    # Scan the RAW spec (not spec_without_fences): the audit command usually
    # lives inside a fenced ```bash block, which fence-stripping would remove —
    # causing this detector to miss a real audit phase and falsely block the
    # build. Phase headings ('## Phase') are never inside fences, so the
    # in_phase boundary logic is unaffected by scanning the raw file.
    #
    # The audit must be the FINAL phase: its purpose is to validate the COMPLETED
    # dispatch path after every modification. An audit run in an earlier (or
    # preflight) phase does not validate the end state, so it does NOT satisfy
    # this gate — only the audit appearing in the last phase block counts.
    # Match a python3 invocation of audit_dispatch.py anywhere on the command
    # line, so common prefixes — env assignments (`PYTHONPATH=. python3 ...`) and
    # `cd ... && python3 ...` — still count. Still requires python3 to be the
    # interpreter running audit_dispatch.py (not just any mention of the file).
    # Only a PARSEABLE phase heading (same regex that builds PHASE_IDS) opens a
    # phase block here. A '## Phase ' heading that PHASE_IDS won't parse — e.g. a
    # placeholder like '## Phase N+1 — ...' — is never executed, so it must NOT
    # count as the audit's home phase; it falls through to the '/^## /' rule and
    # acts as a closing boundary instead.
    awk '
        /^## Phase [0-9]+(\.[0-9]+)?[a-z]?[: ]/ { if (in_phase) result=cur; in_phase=1; cur=0; next }
        /^## / { if (in_phase) result=cur; in_phase=0 }
        in_phase && /(^|[[:space:];&|])python3[[:space:]].*audit_dispatch\.py/ { cur=1 }
        END { if (in_phase) result=cur; exit(result ? 0 : 1) }
    ' "$SPEC_FILE"
}

run_dispatch_audit_gate() {
    local targets target flagged
    flagged=""
    targets="$(get_dispatch_touching_targets | sed 's/[.,;:]$//' | sort -u)"
    for target in $targets; do
        # Exempt the harness substrate itself. Specs author this either from the
        # monorepo git root (recoil/pipeline/...) or relative to the recoil
        # subproject root (pipeline/...) — get_dispatch_touching_targets emits
        # whichever form the spec uses, so both must be exempted.
        if [ "$target" = "recoil/pipeline/tools/harness_orchestrator.sh" ] \
           || [ "$target" = "pipeline/tools/harness_orchestrator.sh" ]; then
            continue
        fi
        flagged="${flagged}${target}
"
    done

    if [ -n "$flagged" ] && ! has_real_audit_phase; then
        echo "BUILD BLOCKED: dispatch-audit phase required for dispatch-touching targets." >&2
        printf '%s' "$flagged" >&2
        CURRENT_PHASE="preflight"
        LAST_GATE="dispatch-audit"
        LAST_VALIDATION_COMMAND=""
        printf 'Dispatch-audit phase required for:\n%s\n' "$flagged" > "$TMP_OUT"
        record_failure_reason 1 "$TMP_OUT" "dispatch-audit phase required"
        # Only record to the build log if it already exists. A --dry-run preflight
        # runs this gate BEFORE the log is created (the log-init block is real-build
        # only) — writing here would spawn a stray build log for a pure diagnostic.
        # The refusal + exit 1 (the fail-closed behavior) still fire in both modes.
        [ -n "$BUILD_LOG" ] && [ -f "$BUILD_LOG" ] && {
            printf '\n## BUILD BLOCKED\nDispatch-audit phase required for:\n%s\n' "$flagged" >> "$BUILD_LOG"
            append_failure_reason_to_log 1 "$TMP_OUT" "dispatch-audit phase required"
        }
        exit 1
    fi
}

run_codex_spec_review_gate() {
    if $SKIP_SPEC_REVIEW; then
        return 0
    fi
    if $DRY_RUN; then
        echo "Codex spec review: SKIPPED (dry-run)"
        return 0
    fi

    local CODEX
    CODEX="$(resolve_codex_binary)" || {
        printf '\n## Codex Spec Review\nCodex spec review: SKIPPED (codex not found)\n' >> "$BUILD_LOG"
        return 0
    }

    local prompt review_output verdict
    local attempt max_attempts codex_exit byte_count last_meaningful assistant_output
    local degenerate degenerate_reason backoff

    prompt="Adversarial spec-review of $SPEC_FILE. Emit CRITICAL/MAJOR/MINOR findings; the FINAL line MUST be exactly 'VERDICT: READY' or 'VERDICT: NEEDS-FIXES'."
    max_attempts=3
    attempt=1
    review_output=""
    verdict=""
    degenerate_reason=""

    # codex exec INTERMITTENTLY aborts before producing model output (only the
    # session header + echoed prompt; no assistant turn) — an invocation-level
    # degeneracy unrelated to the spec. The old single-shot fail-closed turned
    # that into a SPURIOUS BUILD BLOCK. Retry on a detectably-degenerate response;
    # honor the verdict only on a non-degenerate one. (codex self-authored the
    # fix from its own degenerate-output evidence, 2026-06-17.)
    while [ "$attempt" -le "$max_attempts" ]; do
        dispatch_budget_add_round

        review_output="$("$CODEX" exec \
            --skip-git-repo-check \
            -s read-only \
            -c model=gpt-5.5 \
            -c model_reasoning_effort=high \
            -C "$WORKING_DIR" \
            "$prompt" </dev/null 2>&1)"
        codex_exit=$?

        printf '\n## Codex Spec Review (attempt %s/%s)\nCodex exit: %s\n%s\n' \
            "$attempt" "$max_attempts" "$codex_exit" "$review_output" >> "$BUILD_LOG"

        verdict="$(printf '%s\n' "$review_output" | grep -E '^VERDICT:' | tail -n 1 || true)"
        byte_count="$(printf '%s' "$review_output" | wc -c | tr -d '[:space:]')"
        last_meaningful="$(printf '%s\n' "$review_output" | awk 'NF { line=$0 } END { print line }')"
        assistant_output="$(printf '%s\n' "$review_output" | awk -v prompt="$prompt" '
            $0 == prompt { found=1; buf=""; next }
            found { buf = buf $0 "\n" }
            END { printf "%s", buf }
        ')"

        degenerate=false
        degenerate_reason=""

        if [ -z "$verdict" ]; then
            degenerate=true
            degenerate_reason="no VERDICT line"
        elif [ "${byte_count:-0}" -lt 4096 ] && [ "$last_meaningful" = "$prompt" ]; then
            degenerate=true
            degenerate_reason="header/prompt echo only"
        elif [ "${byte_count:-0}" -lt 4096 ] \
             && ! printf '%s\n' "$assistant_output" | grep -Eq '^[[:space:]]*(CRITICAL|MAJOR|MINOR|VERDICT):'; then
            degenerate=true
            degenerate_reason="no assistant review markers after prompt echo"
        fi

        if $degenerate; then
            if [ "$attempt" -lt "$max_attempts" ]; then
                backoff=$((attempt * 2))
                printf 'Codex spec review attempt %s/%s was degenerate (%s); retrying after %ss.\n' \
                    "$attempt" "$max_attempts" "$degenerate_reason" "$backoff" >> "$BUILD_LOG"
                sleep "$backoff"
                attempt=$((attempt + 1))
                continue
            fi
            break
        fi

        # Fail CLOSED: only an explicit 'VERDICT: READY' from a non-degenerate
        # response lets the build proceed. A substantive NEEDS-FIXES verdict must
        # block immediately; retries are only for invocation-degenerate output.
        if ! printf '%s\n' "$verdict" | grep -qE '^VERDICT:[[:space:]]*READY[[:space:]]*$'; then
            echo "BUILD BLOCKED: codex spec-review did not return 'VERDICT: READY' (got: '${verdict:-<none>}')." >&2
            printf '%s\n' "$review_output" >&2
            CURRENT_PHASE="preflight"
            LAST_GATE="codex-spec-review"
            LAST_VALIDATION_COMMAND=""
            printf '%s\n' "$review_output" > "$TMP_OUT"
            record_failure_reason 1 "$TMP_OUT" "codex spec-review did not return VERDICT: READY"
            exit 1
        fi

        return 0
    done

    echo "BUILD BLOCKED: codex spec-review did not produce a non-degenerate 'VERDICT: READY' after $max_attempts attempts (last: ${degenerate_reason:-unknown})." >&2
    printf '%s\n' "$review_output" >&2
    CURRENT_PHASE="preflight"
    LAST_GATE="codex-spec-review"
    LAST_VALIDATION_COMMAND=""
    printf '%s\n' "$review_output" > "$TMP_OUT"
    record_failure_reason 1 "$TMP_OUT" "codex spec-review degenerate after $max_attempts attempts"
    exit 1
}

# SHOULD_FIX #11: Kill the process group, not just the wrapper PID.
run_with_timeout() {
    local timeout=$1
    shift
    # Run in its own process group via setsid (macOS has no setsid;
    # use perl one-liner as fallback).
    if command -v setsid >/dev/null 2>&1; then
        setsid "$@" > "$TMP_OUT" 2>&1 &
    elif command -v perl >/dev/null 2>&1; then
        # macOS has no setsid: perl setpgrp makes the child its OWN process-group
        # leader (pgid == its pid), so the negative-PID kill on timeout reaps the
        # whole tree (grandchildren / spawned servers), not just direct children.
        perl -e 'setpgrp(0,0); exec @ARGV or die "exec failed: $!"' -- "$@" > "$TMP_OUT" 2>&1 &
    else
        ( "$@" ) > "$TMP_OUT" 2>&1 &
    fi
    local pid=$!
    local count=0
    while kill -0 $pid 2>/dev/null; do
        sleep 1
        count=$((count + 1))
        if [ $count -ge "$timeout" ]; then
            # Kill the whole process group (setsid made $pid the group leader, so
            # a negative target hits the group), then fall back to the wrapper +
            # its direct children so grandchildren/servers can't survive the timeout.
            kill -9 "-$pid" 2>/dev/null   # negative = process group
            pkill -P "$pid" 2>/dev/null
            kill -9 "$pid" 2>/dev/null
            wait $pid 2>/dev/null
            printf '\n[Error] Command timed out after %ds\n' "$timeout" >> "$TMP_OUT"
            return 124
        fi
    done
    wait $pid
    return $?
}

cleanup() {
    rm -f "$TMP_OUT" "$TMP_PROMPT" "$VAL_SCRIPT"
    rm -f "${HARNESS_SNAPSHOT_PATH:-}" 2>/dev/null || true
}

__attempt_dir() {
    printf '%s/attempt-%03d\n' "$RUN_DIR" "$ATTEMPT"
}

__status_field() {
    local field="$1"
    [ -n "$RUN_DIR" ] && [ -f "$RUN_DIR/status.json" ] || return 0
    python3 - "$RUN_DIR/status.json" "$field" <<'PY' 2>/dev/null || true
import json
import sys

with open(sys.argv[1], "r", encoding="utf-8") as handle:
    data = json.load(handle)
value = data.get(sys.argv[2])
if value is not None:
    print(value)
PY
}

__emit_terminal_status() {
    local exit_code="$1"
    [ -n "$RUN_DIR" ] || return 0

    local attempt_dir terminal_path tmp_path branch commit pr_url log_path work_dir run_id
    attempt_dir="$(__attempt_dir)"
    mkdir -p "$attempt_dir"
    terminal_path="$attempt_dir/terminal_status.json"
    tmp_path="$attempt_dir/.terminal_status.json.$$"
    work_dir="${WORKING_DIR:-$(pwd)}"
    commit="$(git -C "$work_dir" rev-parse HEAD 2>/dev/null || true)"
    branch="${push_branch:-$(git -C "$work_dir" rev-parse --abbrev-ref HEAD 2>/dev/null || true)}"
    # REC dashboard fix (R10b): __emit_terminal_status runs ONLY in dispatch mode
    # (the `[ -n "$RUN_DIR" ] || return 0` guard at the top of this function), where the
    # stamped status.json.pr_url is the authoritative source. Read it directly with NO
    # env $PR_URL fallback — $PR_URL is env-init at :251 and never reset on the
    # gh-not-found / no-commits-ahead converge sub-paths, so consulting it would leak a
    # stale URL into terminal_status.json (the reaper's input). Empty ⇒ no PR (correct).
    pr_url="$(__status_field pr_url)"
    log_path="${BUILD_LOG:-}"
    run_id="$(__status_field run_id)"
    [ -n "$run_id" ] || run_id="$(basename "$RUN_DIR")"

    TERMINAL_RUN_ID="$run_id" \
    TERMINAL_ATTEMPT="$ATTEMPT" \
    TERMINAL_EXIT_CODE="$exit_code" \
    TERMINAL_CONVERGE_STATUS="$CONVERGE_STATUS" \
    TERMINAL_PHASE="$CURRENT_PHASE" \
    TERMINAL_GATE="$LAST_GATE" \
    TERMINAL_VALIDATION_COMMAND="$LAST_VALIDATION_COMMAND" \
    TERMINAL_CAUSE_HINT="$TERMINAL_CAUSE_HINT" \
    TERMINAL_FAILURE_REASON="$TERMINAL_FAILURE_REASON" \
    TERMINAL_PR_URL="$pr_url" \
    TERMINAL_BRANCH="$branch" \
    TERMINAL_COMMIT="$commit" \
    TERMINAL_LOG_PATH="$log_path" \
    TERMINAL_LAST_OUTPUT="$TMP_OUT" \
    TERMINAL_BUILD_LOG="$log_path" \
    python3 - "$tmp_path" <<'PY'
import datetime as _dt
import json
import os
import re
import sys
from pathlib import Path

out_path = Path(sys.argv[1])

def nullable(value):
    return value if value else None

def read_text(path):
    try:
        return Path(path).read_text(encoding="utf-8", errors="replace")
    except Exception:
        return ""

def normalize(text):
    text = re.sub(r"\b\d{4}-\d{2}-\d{2}[T ][0-9:.]+(?:Z|[+-]\d{2}:?\d{2})?\b", "<timestamp>", text)
    text = re.sub(r"\b\d+(?:\.\d+)?m?s\b", "<duration>", text)
    text = re.sub(r"(?<!\S)/(?:tmp|var/folders)/[^\s\"']+", "<tmp-path>", text)
    text = re.sub(r"\bpid\s+\d+\b", "pid <pid>", text, flags=re.I)
    text = re.sub(r"\b0x[0-9a-fA-F]+\b", "<hex>", text)
    text = re.sub(r"\b\d+\s+tokens?\b", "<tokens>", text, flags=re.I)
    text = re.sub(r":\d+:", ":<line>:", text)
    lines = [line.rstrip() for line in text.splitlines() if line.strip()]
    return "\n".join(lines[-40:])

last_output = read_text(os.environ.get("TERMINAL_LAST_OUTPUT", ""))
build_log = read_text(os.environ.get("TERMINAL_BUILD_LOG", ""))
combined = "\n".join(part for part in [last_output, build_log] if part)

failing = []
for line in last_output.splitlines():
    stripped = line.strip()
    if stripped.startswith("FAILED "):
        token = stripped.split()[1] if len(stripped.split()) > 1 else stripped
        failing.append(token)
    for match in re.findall(r"([A-Za-z0-9_./-]+::[A-Za-z0-9_:-]+)", stripped):
        failing.append(match)
    for match in re.findall(r"\b(test_[A-Za-z0-9_]+)\b", stripped):
        failing.append(match)
seen = set()
failing_test_ids = []
for item in failing:
    if item not in seen:
        seen.add(item)
        failing_test_ids.append(item)

verdicts = re.findall(r"VERDICT:\s*(CONVERGED|NEEDS-FIXES|READY|FAILED|BLOCKED)", combined)
verdict_summary = f"VERDICT: {verdicts[-1]}" if verdicts else ""

exit_code = int(os.environ["TERMINAL_EXIT_CODE"])
converge_status = os.environ.get("TERMINAL_CONVERGE_STATUS") or "UNKNOWN"
cause = os.environ.get("TERMINAL_CAUSE_HINT") or "unknown"
if cause == "unknown":
    if converge_status == "CAPPED":
        cause = "convergence_capped"
    elif exit_code != 0 and os.environ.get("TERMINAL_VALIDATION_COMMAND"):
        cause = "validation_failure"
    elif exit_code != 0:
        cause = "unknown"

failure_reason = os.environ.get("TERMINAL_FAILURE_REASON") or ""
if not failure_reason and exit_code != 0:
    gate = os.environ.get("TERMINAL_GATE") or os.environ.get("TERMINAL_PHASE") or "unknown"
    parts = [
        f"Gate/step: {gate}",
        f"Phase: {os.environ.get('TERMINAL_PHASE') or 'preflight'}",
        f"Exit code: {exit_code}",
    ]
    validation_command = os.environ.get("TERMINAL_VALIDATION_COMMAND") or ""
    if validation_command:
        parts.append(f"Validation command: {validation_command}")
    excerpt = normalize(last_output)
    parts.append("Output tail:\n" + excerpt if excerpt else "Output tail: <no captured output>")
    failure_reason = "\n".join(parts)

payload = {
    "run_id": os.environ.get("TERMINAL_RUN_ID") or None,
    "attempt": int(os.environ.get("TERMINAL_ATTEMPT") or "1"),
    "exit_code": exit_code,
    "converge_status": converge_status,
    "phase": os.environ.get("TERMINAL_PHASE") or "preflight",
    "gate": nullable(os.environ.get("TERMINAL_GATE", "")),
    "validation_command": nullable(os.environ.get("TERMINAL_VALIDATION_COMMAND", "")),
    "failing_test_ids": failing_test_ids,
    "convergence_verdict_summary": verdict_summary,
    "normalized_failure_excerpt": normalize(last_output),
    "failure_reason": nullable(failure_reason),
    "failure_signature": None,
    "cause_hint": cause,
    "pr_url": nullable(os.environ.get("TERMINAL_PR_URL", "")),
    "branch": os.environ.get("TERMINAL_BRANCH") or None,
    "commit": os.environ.get("TERMINAL_COMMIT") or None,
    "log_path": nullable(os.environ.get("TERMINAL_LOG_PATH", "")),
    "written_at": _dt.datetime.now(_dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z"),
}
out_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
PY

    if [ "$CONVERGE_STATUS" != "CONVERGED" ]; then
        local signature
        signature="$(python3 "$SCRIPT_DIR/dispatch_status.py" signature --terminal-status "$tmp_path" 2>/dev/null || true)"
        if [ -n "$signature" ]; then
            python3 - "$tmp_path" "$signature" <<'PY'
import json
import sys
from pathlib import Path

path = Path(sys.argv[1])
data = json.loads(path.read_text(encoding="utf-8"))
data["failure_signature"] = sys.argv[2]
path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
PY
        fi
    fi
    mv "$tmp_path" "$terminal_path"
}

__on_dispatch_exit() {
    local rc="$1"
    __emit_terminal_status "$rc"
    cleanup
}

dispatch_heartbeat_hint() {
    [ -n "$RUN_DIR" ] || return 0
    local phase_hint="$1"
    local round_hint="${2:-}"
    local attempt_dir heartbeat_path status_path
    attempt_dir="$(__attempt_dir)"
    mkdir -p "$attempt_dir"
    heartbeat_path="$attempt_dir/heartbeat.json"
    status_path="$RUN_DIR/status.json"
    HEARTBEAT_PHASE_HINT="$phase_hint" \
    HEARTBEAT_ROUND_HINT="$round_hint" \
    HEARTBEAT_ATTEMPT="$ATTEMPT" \
    HEARTBEAT_LOG_PATH="${BUILD_LOG:-}" \
    HEARTBEAT_STATUS_PATH="$status_path" \
    python3 - "$heartbeat_path" <<'PY' 2>/dev/null || true
import datetime as _dt
import json
import os
import sys
from pathlib import Path

path = Path(sys.argv[1])
try:
    data = json.loads(path.read_text(encoding="utf-8"))
    if not isinstance(data, dict):
        data = {}
except Exception:
    data = {}
try:
    status = json.loads(Path(os.environ["HEARTBEAT_STATUS_PATH"]).read_text(encoding="utf-8"))
except Exception:
    status = {}
data.setdefault("run_id", status.get("run_id"))
data.setdefault("attempt", int(os.environ.get("HEARTBEAT_ATTEMPT") or status.get("attempt") or 1))
data.setdefault("log_path", os.environ.get("HEARTBEAT_LOG_PATH") or None)
data["phase_hint"] = os.environ.get("HEARTBEAT_PHASE_HINT") or None
round_hint = os.environ.get("HEARTBEAT_ROUND_HINT") or ""
data["round_hint"] = int(round_hint) if round_hint.isdigit() else None
data["updated_at"] = _dt.datetime.now(_dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
tmp = path.with_name(f".{path.name}.{os.getpid()}.tmp")
tmp.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n", encoding="utf-8")
tmp.replace(path)
PY
}

dispatch_budget_add_round() {
    [ -n "$RUN_DIR" ] || return 0
    if ! python3 "$SCRIPT_DIR/dispatch_status.py" budget-check --run-dir "$RUN_DIR" --add-rounds 1 >/dev/null 2>&1; then
        CONVERGE_STATUS="CAPPED"
        TERMINAL_CAUSE_HINT="budget_exceeded"
        export CONVERGE_STATUS
        echo ">> ERROR: dispatch round budget exceeded — stopping." >&2
        exit 1
    fi
}

if [ -n "$RUN_DIR" ]; then
    trap '__on_dispatch_exit $?' EXIT
fi

# SHOULD_FIX #6: Build the "what already exists" context from completed phases.
get_completed_phases_summary() {
    local summary=""
    for p in "${PHASE_IDS[@]}"; do
        if [ "$(get_phase_status "$p")" = "PASS" ]; then
            local pname
            pname="$(get_phase_name "$p")"
            if [ -n "$summary" ]; then
                summary="${summary}, Phase $p ($pname)"
            else
                summary="Phase $p ($pname)"
            fi
        fi
    done
    if [ -z "$summary" ]; then
        summary="None"
    fi
    printf '%s' "$summary"
}

# Resolve the dependency list for a phase.
# Returns numeric dependency IDs, one per line. Handles the textual
# "depends_on: all preceding/prior phases" form by expanding it to every phase
# ID that appears BEFORE this phase in PHASE_IDS — otherwise that form yields an
# empty list and a mandatory final-audit phase could run before earlier phases
# PASS (a gate weakening). Requires PHASE_IDS to be populated.
resolve_deps() {
    local phase="$1"
    local dep_line
    dep_line="$(get_phase_content "$phase" | grep -iE 'depends_on' | head -1 | sed -E 's/\(.*\)//')"
    # "all preceding"/"all prior" -> every phase before this one in PHASE_IDS.
    if printf '%s' "$dep_line" | grep -qiE 'all[[:space:]]+(preceding|prior|previous)'; then
        local p
        for p in "${PHASE_IDS[@]}"; do
            if [ "$p" = "$phase" ]; then
                break
            fi
            printf '%s\n' "$p"
        done
        return
    fi
    printf '%s' "$dep_line" | grep -oE '[0-9]+(\.[0-9]+)?[a-z]?' || true
}

# ── PARSING ───────────────────────────────────────────────────────
# MUST_FIX #1: Enumerate actual phase IDs from the document, including
# fractional IDs (1.5, 1.6) and letter-suffixed IDs ('0a', '1.5b'). Existing
# specs (fountain-editor: 0a..5b, console-v2: 1.5b) use the suffix form, so it
# must parse — not be skipped. The heading separator may be ':' OR whitespace
# (covers '## Phase 1: Title' and '## Phase 1 — Title'). Alpha-only IDs
# ('## Phase A:') are still NOT parsed — the integrity guard below makes those
# fail LOUD (abort), never silently skip.

PHASE_IDS=()
while IFS= read -r pid; do
    PHASE_IDS+=("$pid")
done < <(grep -E '^## Phase [0-9]+(\.[0-9]+)?[a-z]?[: ]' "$SPEC_FILE" \
         | sed -E 's/^## Phase ([0-9]+(\.[0-9]+)?[a-z]?)[: ].*/\1/')

TOTAL_PHASES=${#PHASE_IDS[@]}
if [ "$TOTAL_PHASES" -eq 0 ]; then
    echo "FATAL: No phases found in $SPEC_FILE" >&2
    exit 1
fi

# Integrity guard: compare parsed count vs declared count.
declared="$(grep -oE '\| *\*\*Phases\*\* *\| *[0-9]+' "$SPEC_FILE" | grep -oE '[0-9]+$' | head -1)"
if [ -z "$declared" ]; then
    # Also try "**Phases:** N" format
    declared="$(grep -oE '\*\*Phases[:\*]*\*\*[[:space:]]*[0-9]+' "$SPEC_FILE" | grep -oE '[0-9]+$' | head -1)"
fi
if [ -n "$declared" ] && [ "$declared" != "$TOTAL_PHASES" ]; then
    # The integrity guard exists to catch SILENTLY SKIPPED phases (parser found
    # fewer than declared). The established convention adds a mandatory final
    # dispatch-audit phase ON TOP of the declared implementation count, so a spec
    # legitimately parses to declared+1. Allow that single extra phase ONLY when
    # it is a real final audit phase (has_real_audit_phase); every other mismatch
    # — including any shortfall — still fails LOUD so phases can't be skipped.
    if [ "$TOTAL_PHASES" -eq "$((declared + 1))" ] && has_real_audit_phase; then
        echo ">> Spec declares $declared phases; parser found $TOTAL_PHASES (declared+1 mandatory dispatch-audit phase). OK."
    else
        echo "FATAL: Spec declares $declared phases but parser found $TOTAL_PHASES." >&2
        echo "Phase IDs found: ${PHASE_IDS[*]}" >&2
        echo "Aborting to avoid silently skipping phases." >&2
        exit 1
    fi
fi

echo "Parsed $TOTAL_PHASES phases: ${PHASE_IDS[*]}"

# Optional per-phase "alive" progress ping. Opt-in via DISPATCH_NOTIFY_PHASES=1
# and only in dispatch mode (RUN_DIR set). Low-priority + ntfy-only so it lands
# silently in the notification list (no buzz, no Linear spam) — the terminal
# PR/cap ping stays the loud one. Best-effort: never fails the build.
notify_phase_progress() {
    [ "${DISPATCH_NOTIFY_PHASES:-}" = "1" ] || return 0
    [ -n "${RUN_DIR:-}" ] || return 0
    local phase="$1" name="$2" done_count=0 p
    for p in "${PHASE_IDS[@]}"; do
        [ "$(get_phase_status "$p")" = "PASS" ] && done_count=$((done_count + 1))
    done
    bash "$SCRIPT_DIR/dispatch_notify.sh" notify \
        --run-dir "$RUN_DIR" --event phase_passed \
        --message "$SESSION_NAME > Phase $phase PASS ($done_count/$TOTAL_PHASES) - $name" \
        --priority low --ntfy-only >/dev/null 2>&1 || true
}

# ── PRE-BUILD GATES ───────────────────────────────────────────────

if ! $DRY_RUN; then
    EP001_BANNER="⚠ EP001 runtime spend-gates (payload dry-run / receipt diff / disk reconciliation / visual canary) are NOT in this substrate. SAFE for code/tooling builds. Do NOT run a \$-spend visual-production loop through it — use the gated in-skill harness path until v2."
    echo "$EP001_BANNER" >&2
    # A build log from a TERMINAL prior run (complete/blocked/failed) is stale
    # for a fresh re-run — archive it and start clean so old PASS/FAIL entries
    # can't skip phases or immediately block on stale failures. A log with NO
    # terminal marker (a crashed mid-build) is left in place to RESUME.
    if [ -f "$BUILD_LOG" ] && grep -qE '^## BUILD (COMPLETE|BLOCKED|FAILED)' "$BUILD_LOG"; then
        mv "$BUILD_LOG" "${BUILD_LOG}.stale-$(date +%Y%m%d-%H%M%S)"
        echo ">> Archived a terminal prior build log; starting a fresh build." >&2
    fi
    if [ ! -f "$BUILD_LOG" ]; then
        printf '# Build Log — %s\n**Started:** %s\n' "$SESSION_NAME" "$(date)" > "$BUILD_LOG"
    fi
    BANNER_TMP="${TMPDIR:-/tmp}/.harness_banner.$$"
    {
        printf '%s\n\n' "$EP001_BANNER"
        cat "$BUILD_LOG"
    } > "$BANNER_TMP"
    mv "$BANNER_TMP" "$BUILD_LOG"
fi

run_codex_spec_review_gate

# Dispatch-audit gate runs BEFORE the dry-run early-exit so that the documented
# zero-spend preflight (--dry-run [--no-codex-spec-review]) catches the SAME
# fail-closed condition the real build would: a dispatch-touching spec missing its
# mandatory final audit phase is refused (exit 1) in dry-run too, not reported as
# "DRY-RUN COMPLETE". The gate is local parsing only (no model dispatch), so it is
# safe in dry-run; its build-log write is suppressed when no log exists yet.
run_dispatch_audit_gate

# ── DRY-RUN MODE ─────────────────────────────────────────────────

if $DRY_RUN; then
    echo ""
    echo "========== DRY-RUN: Parse Results =========="
    echo ""
    for phase in "${PHASE_IDS[@]}"; do
        phase_name="$(get_phase_name "$phase")"
        echo "--- Phase $phase: $phase_name ---"

        # MUST_FIX #2: Parse dependencies using numeric token extraction,
        # stripping bold markdown and parentheticals. Also expands the textual
        # "all preceding phases" form via resolve_deps.
        dep_list="$(resolve_deps "$phase" | tr '\n' ' ' | sed -E 's/[[:space:]]+$//')"
        if [ -z "$dep_list" ]; then
            echo "  Dependencies: none"
        else
            echo "  Dependencies: $dep_list"
        fi

        val_cmd="$(get_validation_cmd "$phase")"
        if [ -z "$val_cmd" ]; then
            echo "  Validation: (none)"
        else
            echo "  Validation:"
            printf '%s\n' "$val_cmd" | sed 's/EP001/E-P-001/g' | sed 's/^/    /'
        fi

        status="$(get_phase_status "$phase")"
        echo "  Current status: $status"
        echo ""
    done

    preflight="$(get_preflight_extraction)"
    if [ -n "$preflight" ]; then
        echo "--- Pre-flight Extraction ---"
        printf '%s\n' "$preflight" | head -5
        echo "  ..."
    fi

    echo "Convergence: SKIPPED (dry-run)"
    echo "========== DRY-RUN COMPLETE =========="
    exit 0
fi

# ── INITIALIZE BUILD LOG ──────────────────────────────────────────

# ── MAIN ORCHESTRATION LOOP ───────────────────────────────────────

if [ -z "$RUN_DIR" ]; then
    trap cleanup EXIT
fi

while true; do
    made_progress=false
    all_done=true
    any_blocked=false

    for phase in "${PHASE_IDS[@]}"; do
        phase_name="$(get_phase_name "$phase")"
        status="$(get_phase_status "$phase")"

        if [ "$status" = "PASS" ]; then
            continue
        fi

        all_done=false

        if [ "$status" = "BLOCKED" ] || [ "$status" = "FAIL" ]; then
            any_blocked=true
            continue
        fi

        # SHOULD_FIX #10: If phase was IN_PROGRESS (killed mid-run),
        # check if validation already passes before re-dispatching.
        if [ "$status" = "IN_PROGRESS" ]; then
            val_cmd="$(get_validation_cmd "$phase")"
            if [ -n "$val_cmd" ]; then
                CURRENT_PHASE="phase-$phase"
                LAST_GATE="validation"
                LAST_VALIDATION_COMMAND="$val_cmd"
                dispatch_heartbeat_hint "$CURRENT_PHASE"
                printf '#!/usr/bin/env bash\n%s\n' "$val_cmd" > "$VAL_SCRIPT"
                chmod +x "$VAL_SCRIPT"
                run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                resume_exit=$?
                if [ $resume_exit -eq 0 ]; then
                    echo ">> Phase $phase was IN_PROGRESS but validation passes. Marking PASS."
                    # Commit any uncommitted work
                    git -C "$WORKING_DIR" add -A
                    git -C "$WORKING_DIR" reset -q -- "$BUILD_LOG" 2>/dev/null
                    # The phase commit is the durable resume/push boundary (parity
                    # with the normal phase path). If there ARE staged changes but
                    # the commit FAILS, the validated work is not committed — fail
                    # the phase CLOSED instead of logging PASS over an uncommitted
                    # result that a crash/push would lose.
                    if ! git -C "$WORKING_DIR" diff --cached --quiet 2>/dev/null; then
                        if ! git -C "$WORKING_DIR" commit -q -m "harness: Phase $phase PASS — $phase_name (resumed)"; then
                            echo ">> ERROR: Phase $phase (resumed) validated but the commit FAILED — failing the phase (no durable commit)." >&2
                            LAST_GATE="commit"
                            LAST_VALIDATION_COMMAND=""
                            record_failure_reason 1 "$TMP_OUT" "Phase $phase resumed validation passed but commit failed"
                            log_status "$phase" "$phase_name" "FAIL"
                            made_progress=true
                            break
                        fi
                    fi
                    log_status "$phase" "$phase_name" "PASS"
                    notify_phase_progress "$phase" "$phase_name"
                    made_progress=true
                    break  # Re-evaluate from top
                fi
            fi
            echo ">> Phase $phase was IN_PROGRESS — re-dispatching."
        fi

        # ── Check dependencies ──
        # MUST_FIX #2: Extract numeric tokens after stripping bold markdown
        # and parentheticals. Handles "**depends_on:** Phase 2",
        # "depends_on: 1, 2", "none", "nothing". resolve_deps also expands the
        # "all preceding phases" form so a mandatory final audit waits for every
        # prior phase to PASS.
        dep_list="$(resolve_deps "$phase")"
        deps_met=true
        for dep in $dep_list; do
            dep_status="$(get_phase_status "$dep")"
            if [ "$dep_status" != "PASS" ]; then
                deps_met=false
                if [ "$dep_status" = "FAIL" ] || [ "$dep_status" = "BLOCKED" ]; then
                    CURRENT_PHASE="phase-$phase"
                    LAST_GATE="dependency"
                    LAST_VALIDATION_COMMAND=""
                    printf 'Dependency Phase %s status is %s; Phase %s cannot run.\n' "$dep" "$dep_status" "$phase" > "$TMP_OUT"
                    record_failure_reason 1 "$TMP_OUT" "dependency Phase $dep is $dep_status"
                    log_status "$phase" "$phase_name" "BLOCKED"
                    made_progress=true
                fi
                break
            fi
        done

        if ! $deps_met; then
            continue
        fi

        # ── EXECUTE PHASE ─────────────────────────────────────────
        echo "========================================"
        echo "Executing Phase $phase: $phase_name"
        echo "========================================"
        CURRENT_PHASE="phase-$phase"
        LAST_GATE="dispatch"
        LAST_VALIDATION_COMMAND=""
        dispatch_heartbeat_hint "$CURRENT_PHASE"
        phase_engine="$(get_phase_engine "$phase")" || exit $?
        echo ">> Engine: $phase_engine"
        log_status "$phase" "$phase_name" "IN_PROGRESS"

        # SHOULD_FIX #6: Build completed phases summary correctly.
        completed_phases="$(get_completed_phases_summary)"
        preflight="$(get_preflight_extraction)"
        phase_content="$(get_phase_content "$phase")"
        val_cmd="$(get_validation_cmd "$phase")"

        # Build the phase prompt.
        # SHOULD_FIX #7: Don't tell Claude to commit — orchestrator handles commits
        # after validation passes and quality review completes.
        cat <<PROMPT_EOF > "$TMP_PROMPT"
You are executing Phase $phase of a multi-phase build spec.

WORKING DIRECTORY: $WORKING_DIR
BUILD SPEC: $SPEC_FILE
WHAT ALREADY EXISTS (Completed Phases): $completed_phases

$([ -n "$preflight" ] && printf 'PRE-FLIGHT EXTRACTION:\n%s\n' "$preflight")

PHASE SPECIFICATION:
$phase_content

INSTRUCTIONS:
1. Read the BUILD SPEC at the path above for full context.
2. Implement the exact requirements for Phase $phase. Do not implement any other phase.
3. If exact implementation code blocks (--detail max) are provided, use them exactly. If behavioral specs (--detail high) are provided, implement the behavior.
4. Strictly respect the "Scope boundary" defined in the spec.
5. Run the validation command embedded in the phase spec to confirm your work passes.
6. Do NOT commit — the orchestrator handles git commits after validation.
PROMPT_EOF
        if [ "$phase_engine" = "codex" ]; then
            append_codex_footer "$TMP_PROMPT"
        fi

        # Phase log file for transcript capture.
        # SHOULD_FIX #8: Capture all Claude output instead of discarding.
        PHASE_LOG="$SPEC_DIR/phase-${phase}-claude.log"

        # Snapshot the tree BEFORE dispatch. A bare "is the tree clean after?"
        # check is fail-OPEN on an already-dirty tree (pre-existing changes look
        # like the engine touched files even if a failed dispatch touched nothing).
        # Comparing the before/after fingerprint detects a true no-op regardless.
        pre_dispatch_fp="$(tree_fingerprint)"

        # 1. Run Implementation
        echo ">> Dispatching $phase_engine for implementation..."
        build_prompt="$(cat "$TMP_PROMPT")"
        dispatch_phase "$build_prompt" "$phase_engine" > "$PHASE_LOG" 2>&1
        dispatch_rc=$?

        # Did the dispatch change anything? Compare the post-dispatch tree
        # fingerprint against the pre-dispatch snapshot (the fingerprint already
        # excludes the harness's OWN artifacts — build log, phase transcripts,
        # review log, stale archives). Equal fingerprints => the dispatch made no
        # code changes, even if the tree was already dirty going in.
        post_dispatch_fp="$(tree_fingerprint)"
        no_changes=false
        if [ "$pre_dispatch_fp" = "$post_dispatch_fp" ]; then
            no_changes=true
        fi

        # A FAILED dispatch (engine missing / unauthenticated / rate-limited) that
        # also made NO changes means the phase was never implemented — fail it HARD
        # instead of letting a trivial/absent validation mark it PASS.
        if [ "$dispatch_rc" -ne 0 ] && $no_changes; then
            echo ">> ERROR: $phase_engine dispatch FAILED (exit $dispatch_rc) and made no changes — phase not implemented (auth / rate-limit / missing engine?)." >&2
            echo ">> See: $PHASE_LOG" >&2
            LAST_GATE="dispatch"
            LAST_VALIDATION_COMMAND=""
            record_failure_reason "$dispatch_rc" "$PHASE_LOG" "$phase_engine dispatch failed and made no changes"
            log_status "$phase" "$phase_name" "FAIL"
            made_progress=true
            break
        fi

        # Dispatch returned 0 but did nothing — surface a likely auth/usage issue.
        if $no_changes; then
            log_size="$(wc -c < "$PHASE_LOG" 2>/dev/null | tr -d ' ')"
            if [ "${log_size:-0}" -lt 100 ]; then
                echo ">> WARNING: $phase_engine produced no file changes and minimal output. Possible auth/usage error."
                echo ">> See: $PHASE_LOG"
            fi
        fi

        # 2. Validation & Retry Loop
        attempt=0
        phase_passed=false

        while [ $attempt -le $MAX_RETRIES ]; do
            echo ">> Running validation (Attempt $((attempt + 1))/$((MAX_RETRIES + 1)))..."
            LAST_GATE="validation"
            LAST_VALIDATION_COMMAND="$val_cmd"

            if [ -z "$val_cmd" ]; then
                # Don't SILENTLY green a phase with no runnable gate — warn loudly
                # so an unverified phase is visible in the dispatch output/logs.
                echo ">> WARNING: Phase $phase has NO parseable validation command — passing UNVERIFIED." >&2
                printf 'WARNING: no validation command parsed for phase %s — passed UNVERIFIED.\n' "$phase" > "$TMP_OUT"
                val_exit=0
            else
                # MUST_FIX #4: Run under bash explicitly with proper shebang.
                # NICE_TO_HAVE #14: Use printf instead of echo for val_cmd.
                printf '#!/usr/bin/env bash\n%s\n' "$val_cmd" > "$VAL_SCRIPT"
                chmod +x "$VAL_SCRIPT"
                run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                val_exit=$?

            fi

            if [ $val_exit -eq 0 ]; then
                phase_passed=true
                break
            else
                attempt=$((attempt + 1))
                if [ $attempt -gt $MAX_RETRIES ]; then break; fi

                debug_engine="$phase_engine"
                if [ "$attempt" -ge 2 ]; then
                    debug_engine="claude"
                    if [ "$phase_engine" != "claude" ]; then
                        echo ">> Debug retry $attempt escalating to claude for decorrelation."
                    fi
                fi
                echo ">> Validation failed (exit $val_exit). Dispatching $debug_engine to debug..."
                err_output="$(tail -n 50 "$TMP_OUT" 2>/dev/null || true)"

                # SHOULD_FIX #9: Use heredoc for debug prompt instead of
                # literal \n in a variable assignment.
                DEBUG_PROMPT_FILE="${TMPDIR:-/tmp}/.harness_debug_prompt.$$"
                cat <<DEBUG_EOF > "$DEBUG_PROMPT_FILE"
Phase $phase validation failed with exit code $val_exit.

WORKING DIRECTORY: $WORKING_DIR
BUILD SPEC: $SPEC_FILE

VALIDATION COMMAND:
$val_cmd

OUTPUT (last 50 lines):
$err_output

Debug and fix the issue. Read the BUILD SPEC for the full Phase $phase specification.
Stay within the scope of Phase $phase. Do NOT commit — the orchestrator handles commits.
DEBUG_EOF
                if [ "$debug_engine" = "codex" ]; then
                    append_codex_footer "$DEBUG_PROMPT_FILE"
                fi
                debug_prompt="$(cat "$DEBUG_PROMPT_FILE")"
                dispatch_phase "$debug_prompt" "$debug_engine" >> "$PHASE_LOG" 2>&1 || true
                rm -f "$DEBUG_PROMPT_FILE"
            fi
        done

        # 3. Post-Phase Actions
        if $phase_passed; then
            echo ">> Phase $phase PASS!"

            # SHOULD_FIX #7: Commit BEFORE quality review so review sees the diff.
            # MUST_FIX #5: Use git -C, add -A, exclude the build log.
            git -C "$WORKING_DIR" add -A
            git -C "$WORKING_DIR" reset -q -- "$BUILD_LOG" 2>/dev/null
            # Also exclude phase logs from the commit
            for plog in "$SPEC_DIR"/phase-*-claude.log; do
                [ -f "$plog" ] && git -C "$WORKING_DIR" reset -q -- "$plog" 2>/dev/null
            done
            # The phase commit is the durable resume/push boundary. If there ARE
            # staged changes but the commit FAILS (pre-commit hook, index lock, or
            # any git error), the validated work is not committed — fail the phase
            # CLOSED instead of logging PASS over an uncommitted result that would
            # later print BUILD COMPLETE / be pushed without the changes.
            if ! git -C "$WORKING_DIR" diff --cached --quiet 2>/dev/null; then
                if ! git -C "$WORKING_DIR" commit -q -m "harness: Phase $phase PASS — $phase_name"; then
                    echo ">> ERROR: Phase $phase validated but the commit FAILED — failing the phase (no durable commit)." >&2
                    echo ">> See: $PHASE_LOG" >&2
                    LAST_GATE="commit"
                    LAST_VALIDATION_COMMAND=""
                    record_failure_reason 1 "$PHASE_LOG" "Phase $phase validated but commit failed"
                    log_status "$phase" "$phase_name" "FAIL"
                    made_progress=true
                    break
                fi
            fi

            log_status "$phase" "$phase_name" "PASS"
            notify_phase_progress "$phase" "$phase_name"
            made_progress=true
            break  # Re-evaluate dependency graph from the top
        else
            echo ">> Phase $phase FAIL! Max retries exhausted."
            echo ">> See: $PHASE_LOG"
            LAST_GATE="validation"
            LAST_VALIDATION_COMMAND="$val_cmd"
            record_failure_reason "${val_exit:-1}" "$TMP_OUT" "Phase $phase validation retries exhausted"
            log_status "$phase" "$phase_name" "FAIL"
            made_progress=true
            break
        fi
    done

    # Break outer loop if build is done or completely stuck.
    # NOTE: do NOT write the BUILD COMPLETE marker / print SUCCESS here — the
    # end-of-build code review (below) may regress validation and flip
    # all_done=false. The completion marker is written only AFTER that
    # re-validation passes, so a monitor keying off "## BUILD COMPLETE" can
    # never see green on a build the review later failed.
    if $all_done; then
        break
    fi

    if ! $made_progress; then
        printf '\n## BUILD BLOCKED\n**Blocked at:** %s\n' "$(date)" >> "$BUILD_LOG"
        if [ -n "${TERMINAL_FAILURE_REASON:-}" ]; then
            printf '**Failure reason:**\n```text\n%s\n```\n' "$TERMINAL_FAILURE_REASON" >> "$BUILD_LOG"
        else
            append_failure_reason_to_log 1 "$TMP_OUT" "no progress could be made"
        fi
        echo ""
        echo "FAILURE: Build Blocked. No progress could be made."
        echo "Build log: $BUILD_LOG"
        # Print summary of blocked/failed phases
        for phase in "${PHASE_IDS[@]}"; do
            status="$(get_phase_status "$phase")"
            if [ "$status" != "PASS" ]; then
                phase_name="$(get_phase_name "$phase")"
                echo "  Phase $phase ($phase_name): $status"
            fi
        done
        break
    fi
done

export CONVERGE_STATUS="SKIPPED"

# End-of-build convergence review (bounded review/fix loop across all changes)
if $all_done && ! $DRY_RUN; then
    echo ""
    echo "Running end-of-build convergence review..."

    if [ "${CONVERGE_MAX_ROUNDS+x}" != x ]; then
        CONVERGE_MAX_ROUNDS=6
    fi
    case "$CONVERGE_MAX_ROUNDS" in
        ""|*[!0-9]*)
            echo ">> ERROR: CONVERGE_MAX_ROUNDS must be a positive integer (got '${CONVERGE_MAX_ROUNDS:-<empty>}')." >&2
            printf '\n## BUILD FAILED\nCONVERGE_MAX_ROUNDS must be a positive integer (got `%s`).\n' "${CONVERGE_MAX_ROUNDS:-<empty>}" >> "$BUILD_LOG"
            CURRENT_PHASE="end-of-build-convergence"
            LAST_GATE="convergence-config"
            LAST_VALIDATION_COMMAND=""
            printf 'CONVERGE_MAX_ROUNDS must be a positive integer; got %s.\n' "${CONVERGE_MAX_ROUNDS:-<empty>}" > "$TMP_OUT"
            append_failure_reason_to_log 1 "$TMP_OUT" "invalid CONVERGE_MAX_ROUNDS"
            all_done=false
            ;;
    esac
    if $all_done && [ "$CONVERGE_MAX_ROUNDS" -lt 1 ]; then
        echo ">> ERROR: CONVERGE_MAX_ROUNDS must be >= 1 (got '$CONVERGE_MAX_ROUNDS')." >&2
        printf '\n## BUILD FAILED\nCONVERGE_MAX_ROUNDS must be >= 1 (got `%s`).\n' "$CONVERGE_MAX_ROUNDS" >> "$BUILD_LOG"
        CURRENT_PHASE="end-of-build-convergence"
        LAST_GATE="convergence-config"
        LAST_VALIDATION_COMMAND=""
        printf 'CONVERGE_MAX_ROUNDS must be >= 1; got %s.\n' "$CONVERGE_MAX_ROUNDS" > "$TMP_OUT"
        append_failure_reason_to_log 1 "$TMP_OUT" "invalid CONVERGE_MAX_ROUNDS"
        all_done=false
    fi

    if $all_done; then
        CODEX="$(resolve_codex_binary)" || CODEX=""
        if [ -z "$CODEX" ]; then
            CONVERGE_STATUS="FALLBACK"
            export CONVERGE_STATUS
            echo "Convergence: codex not found — fell back to single-pass Claude review"
            printf '\nConvergence: codex not found — fell back to single-pass Claude review\n' >> "$BUILD_LOG"

            # The fallback is the legacy mandatory single-pass review behavior.
            CURRENT_PHASE="end-of-build-convergence"
            LAST_GATE="code-review"
            LAST_VALIDATION_COMMAND=""
            dispatch_heartbeat_hint "$CURRENT_PHASE"
            dispatch_phase "$(build_claude_convergence_review_prompt "$SPEC_FILE")" "claude" > "${SPEC_DIR}/end-of-build-review.log" 2>&1
            rev_dispatch_exit=$?
            if [ $rev_dispatch_exit -ne 0 ]; then
                echo ">> ERROR: end-of-build code review failed to run (exit $rev_dispatch_exit) — not marking build green." >&2
                printf '\n## BUILD FAILED\nMandatory end-of-build code review did not run (dispatch exit %s). See end-of-build-review.log.\n' "$rev_dispatch_exit" >> "$BUILD_LOG"
                append_failure_reason_to_log "$rev_dispatch_exit" "${SPEC_DIR}/end-of-build-review.log" "mandatory end-of-build code review did not run"
                all_done=false
            fi

            if $all_done; then
                git -C "$WORKING_DIR" add -A
                git -C "$WORKING_DIR" reset -q -- "$BUILD_LOG" 2>/dev/null
                for plog in "$SPEC_DIR"/phase-*-claude.log "$SPEC_DIR/end-of-build-review.log"; do
                    [ -f "$plog" ] && git -C "$WORKING_DIR" reset -q -- "$plog" 2>/dev/null
                done
                if ! git -C "$WORKING_DIR" diff --cached --quiet 2>/dev/null; then
                    global_val="$(awk '
                        /^## Validation command/ {flag=1; next}
                        flag && /^```bash/ {code=1; next}
                        code && /^```/ {exit}
                        code {print}
                    ' "$SPEC_FILE")"
                    review_ok=true
                    if [ -n "$global_val" ]; then
                        LAST_GATE="global-validation"
                        LAST_VALIDATION_COMMAND="$global_val"
                        printf '#!/usr/bin/env bash\n%s\n' "$global_val" > "$VAL_SCRIPT"
                        chmod +x "$VAL_SCRIPT"
                        run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                        rev_exit=$?
                        if [ $rev_exit -ne 0 ]; then
                            echo ">> ERROR: end-of-build code-review fix REGRESSED the global validation — not marking build green, NOT committing the fix." >&2
                            printf '\n## BUILD FAILED\nEnd-of-build code-review fix regressed the global validation (re-run exit %s). Fix left staged/uncommitted for inspection.\n' "$rev_exit" >> "$BUILD_LOG"
                            append_failure_reason_to_log "$rev_exit" "$TMP_OUT" "end-of-build code-review fix regressed global validation"
                            all_done=false
                            review_ok=false
                        fi
                    else
                        for rp in "${PHASE_IDS[@]}"; do
                            rp_val="$(get_validation_cmd "$rp")"
                            [ -z "$rp_val" ] && continue
                            CURRENT_PHASE="phase-$rp"
                            LAST_GATE="validation"
                            LAST_VALIDATION_COMMAND="$rp_val"
                            printf '#!/usr/bin/env bash\n%s\n' "$rp_val" > "$VAL_SCRIPT"
                            chmod +x "$VAL_SCRIPT"
                            run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                            rp_exit=$?
                            if [ $rp_exit -ne 0 ]; then
                                echo ">> ERROR: end-of-build code-review fix REGRESSED Phase $rp validation — not marking build green, NOT committing the fix." >&2
                                printf '\n## BUILD FAILED\nEnd-of-build code-review fix regressed Phase %s validation. Fix left staged/uncommitted for inspection.\n' "$rp" >> "$BUILD_LOG"
                                append_failure_reason_to_log "$rp_exit" "$TMP_OUT" "end-of-build code-review fix regressed Phase $rp validation"
                                all_done=false
                                review_ok=false
                                break
                            fi
                        done
                    fi
                    if $review_ok; then
                        if ! git -C "$WORKING_DIR" commit -q -m "harness: end-of-build code review fixes"; then
                            echo ">> ERROR: end-of-build review-fix commit FAILED — not marking build green, fixes left staged." >&2
                            printf '\n## BUILD FAILED\nEnd-of-build review-fix commit failed (hook or index-lock); fixes left staged/uncommitted.\n' >> "$BUILD_LOG"
                            LAST_GATE="commit"
                            LAST_VALIDATION_COMMAND=""
                            append_failure_reason_to_log 1 "${SPEC_DIR}/end-of-build-review.log" "end-of-build review-fix commit failed"
                            all_done=false
                        fi
                    else
                        echo ">> Regressing review-fix left STAGED (uncommitted) — not committed, not pushed." >&2
                    fi
                fi
            fi
        else
            convergence_done=false
            convergence_round=1
            # The review/fix pass is a full codex run, not a validation script —
            # cap it on its own generous budget, not the 5-minute VALIDATION_TIMEOUT
            # (build phases at dispatch time run unbounded; a 300s cap here would
            # SIGKILL a legitimate review mid-pass and fail the build closed).
            CONVERGE_REVIEW_TIMEOUT="${CONVERGE_REVIEW_TIMEOUT:-1800}"
            validation_refs="$(
                awk '
                    /^## Validation command/ {flag=1; next}
                    flag && /^```bash/ {code=1; next}
                    code && /^```/ {exit}
                    code {print}
                ' "$SPEC_FILE"
                for rp in "${PHASE_IDS[@]}"; do
                    get_validation_cmd "$rp"
                    printf '\n'
                done
            )"
            frozen_validation_files="$(
                printf '%s\n' "$validation_refs" \
                    | tr '[:space:]' '\n' \
                    | sed -E "s/^[\`\"'(){}<>]+//; s/[\`\"'(){}<>,;:|&]+$//" \
                    | while IFS= read -r token; do
                        [ -z "$token" ] && continue
                        if [[ "$token" == -* ]]; then
                            continue
                        elif [[ "$token" == "$WORKING_DIR"/* ]]; then
                            token="${token#$WORKING_DIR/}"
                        elif [[ -n "${REPO_ROOT:-}" && "$token" == "$REPO_ROOT"/* ]]; then
                            token="${token#$REPO_ROOT/}"
                        elif [[ "$token" == ./* ]]; then
                            token="${token#./}"
                        fi
                        if [[ "$token" == */* || "$token" == *.* ]]; then
                            printf '%s\n' "$token"
                        fi
                    done \
                    | sort -u
            )"

            # Non-test deliverables this spec AUTHORIZES the build to create/modify
            # (the "Files to create/modify" sections). A deliverable that ALSO appears
            # in a validation command — e.g. a wrapper script the gate runs `bash -n`
            # on, or a tool the gate greps — must stay FIXABLE during convergence;
            # otherwise any tool/script deliverable that is syntax-checked by its own
            # gate can never be corrected and a legitimately-converged build aborts
            # (REC-86, 2026-06-07: convergence hardened linear_drain_headless.py, a
            # Phase-3 deliverable, and the frozen-gate guard killed the build).
            # Test files are deliberately NOT excluded here: they remain frozen via the
            # test-file regex below, preserving the don't-weaken-tests invariant.
            deliverable_files="$(
                dispatch_target_lines \
                    | grep -oE '[A-Za-z0-9_./-]+\.[A-Za-z0-9]+' \
                    | sed -E "s#^$WORKING_DIR/##; s#^\./##" \
                    | grep -Ev '(^|/)tests?/|(^|/)test_|_test\.|conftest\.py|\.feature$' \
                    | sort -u
            )"

            while [ "$convergence_round" -le "$CONVERGE_MAX_ROUNDS" ]; do
                CURRENT_PHASE="end-of-build-convergence"
                LAST_GATE="convergence-review"
                LAST_VALIDATION_COMMAND=""
                dispatch_heartbeat_hint "$CURRENT_PHASE" "$convergence_round"
                dispatch_budget_add_round
                echo ">> Convergence Round $convergence_round/$CONVERGE_MAX_ROUNDS"
                round_start_ref="$(git -C "$WORKING_DIR" rev-parse HEAD)"
                CONVERGENCE_PROMPT_FILE="${TMPDIR:-/tmp}/.harness_convergence_prompt.$$"
                cat <<CONVERGENCE_EOF > "$CONVERGENCE_PROMPT_FILE"
You are executing an end-of-build convergence review/fix pass for this harness build.

WORKING DIRECTORY: $WORKING_DIR
BUILD SPEC: $SPEC_FILE
ROUND: $convergence_round of $CONVERGE_MAX_ROUNDS

$(build_convergence_review_contract_block "$SPEC_FILE")

Review this build's changes, including the diff versus the base branch and any current working-tree changes, for correctness, safety, regressions, and spec compliance.

FIX every CRITICAL or MAJOR finding by editing source. Also FIX every finding touching a validation gate, safety gate, acceptance assertion, or build-safety behavior.

FROZEN-gate discipline: NEVER modify, weaken, disable, delete, skip, rename, remove, OR CREATE/ADD any validation command, test fixture, acceptance assertion, safety/validation-gate file, or test file. This includes ADDING a missing test or assertion: the frozen-gate guard blocks ALL convergence edits to gate/test files (additions included), by design — there is no safe automated way to tell a strengthening addition from an additive weakening (e.g. an inserted early `return`/`pass`, a broad `except`, or a `conftest.py` collection hook). If a gate test is MISSING, WRONG, WEAK, or a finding can only be resolved by changing a gate, do NOT touch it — leave it UNFIXED and report it as MAJOR (gate-bound) so it is authored in-band on the next build.

Leave MINOR findings unfixed.

Emit findings as lines beginning exactly with CRITICAL:, MAJOR:, or MINOR:. End with a FINAL line exactly one of:
VERDICT: CONVERGED
VERDICT: NEEDS-FIXES

Use VERDICT: CONVERGED only when no CRITICAL or MAJOR findings remain and no gate-bound finding remains. Otherwise use VERDICT: NEEDS-FIXES.

Do NOT commit. The orchestrator will revalidate and commit only after deterministic gate guards pass.
CONVERGENCE_EOF
                run_with_timeout "$CONVERGE_REVIEW_TIMEOUT" "$CODEX" exec \
                    --skip-git-repo-check \
                    -s workspace-write \
                    -c approval_policy=never \
                    -c model=gpt-5.5 \
                    -c model_reasoning_effort=high \
                    -C "$WORKING_DIR" \
                    "$(cat "$CONVERGENCE_PROMPT_FILE")" </dev/null
                conv_exit=$?
                rm -f "$CONVERGENCE_PROMPT_FILE"
                conv_output="$(cat "$TMP_OUT" 2>/dev/null || true)"
                printf '\n## Convergence Round %s\n%s\n' "$convergence_round" "$conv_output" >> "$BUILD_LOG"

                # Tolerate leading/trailing whitespace around the verdict token
                # (codex output can be indented / trailing-spaced) — same leniency
                # the spec-review gate already uses — but normalize back to the bare
                # token so the `case` match below stays exact.
                verdict="$(printf '%s\n' "$conv_output" | grep -oE 'VERDICT: (CONVERGED|NEEDS-FIXES)' | tail -n 1 || true)"
                if [ $conv_exit -ne 0 ] || [ -z "$verdict" ]; then
                    echo ">> ERROR: convergence review did not run cleanly in round $convergence_round — not marking build green." >&2
                    printf '\n## BUILD FAILED — convergence review did not run (round %s)\nCodex exit: %s. Verdict: %s. Partial edits left uncommitted for inspection.\n' "$convergence_round" "$conv_exit" "${verdict:-<none>}" >> "$BUILD_LOG"
                    append_failure_reason_to_log "$conv_exit" "$TMP_OUT" "convergence review round $convergence_round did not run cleanly"
                    all_done=false
                    break
                fi

                round_changed_files="$(
                    {
                        git -C "$WORKING_DIR" diff --name-only "$round_start_ref" -- 2>/dev/null
                        git -C "$WORKING_DIR" ls-files --others --exclude-standard 2>/dev/null
                    } | sort -u
                )"
                frozen_touched=""
                # Iterate via read (not unquoted word-splitting) so a changed path
                # containing whitespace can't be split into tokens that slip past
                # both the test-file regex and the frozen-file exact match.
                while IFS= read -r changed_file; do
                    [ -z "$changed_file" ] && continue
                    if printf '%s\n' "$changed_file" | grep -qE '(^|/)tests?/|(^|/)test_|_test\.|conftest\.py|\.feature$'; then
                        frozen_touched="${frozen_touched}${changed_file} "
                        continue
                    fi
                    if [ -n "$frozen_validation_files" ] && printf '%s\n' "$frozen_validation_files" | grep -Fxq "$changed_file"; then
                        if [ -n "$deliverable_files" ] && printf '%s\n' "$deliverable_files" | grep -Fxq "$changed_file"; then
                            : # declared non-test deliverable — fixable during convergence
                        else
                            frozen_touched="${frozen_touched}${changed_file} "
                        fi
                    fi
                done <<< "$round_changed_files"
                if [ -n "$frozen_touched" ]; then
                    echo ">> ERROR: convergence round $convergence_round modified a frozen gate: $frozen_touched" >&2
                    printf '\n## BUILD FAILED — convergence round %s modified a frozen gate: %s\nEdits left uncommitted for inspection.\nFIX: this gate/test was authored or changed by convergence, which is blocked by design. If the test is the contract, author it in a named implementation PHASE (so it lands in-band) and re-dispatch; do not let convergence backfill gate tests.\n' "$convergence_round" "$frozen_touched" >> "$BUILD_LOG"
                    printf 'Convergence round %s modified frozen gate files: %s\n' "$convergence_round" "$frozen_touched" > "$TMP_OUT"
                    append_failure_reason_to_log 1 "$TMP_OUT" "convergence modified a frozen gate"
                    all_done=false
                    break
                fi

                global_val="$(awk '
                    /^## Validation command/ {flag=1; next}
                    flag && /^```bash/ {code=1; next}
                    code && /^```/ {exit}
                    code {print}
                ' "$SPEC_FILE")"
                review_ok=true
                if [ -n "$global_val" ]; then
                    LAST_GATE="global-validation"
                    LAST_VALIDATION_COMMAND="$global_val"
                    printf '#!/usr/bin/env bash\n%s\n' "$global_val" > "$VAL_SCRIPT"
                    chmod +x "$VAL_SCRIPT"
                    run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                    rev_exit=$?
                    if [ $rev_exit -ne 0 ]; then
                        echo ">> ERROR: convergence round $convergence_round fix REGRESSED the global validation — not marking build green, NOT committing the fix." >&2
                        printf '\n## BUILD FAILED\nConvergence round %s fix regressed the global validation (re-run exit %s). Fix left uncommitted for inspection.\n' "$convergence_round" "$rev_exit" >> "$BUILD_LOG"
                        append_failure_reason_to_log "$rev_exit" "$TMP_OUT" "convergence round $convergence_round regressed global validation"
                        all_done=false
                        review_ok=false
                    fi
                else
                    for rp in "${PHASE_IDS[@]}"; do
                        rp_val="$(get_validation_cmd "$rp")"
                        [ -z "$rp_val" ] && continue
                        CURRENT_PHASE="phase-$rp"
                        LAST_GATE="validation"
                        LAST_VALIDATION_COMMAND="$rp_val"
                        printf '#!/usr/bin/env bash\n%s\n' "$rp_val" > "$VAL_SCRIPT"
                        chmod +x "$VAL_SCRIPT"
                        run_with_timeout "$VALIDATION_TIMEOUT" bash "$VAL_SCRIPT"
                        rp_exit=$?
                        if [ $rp_exit -ne 0 ]; then
                            echo ">> ERROR: convergence round $convergence_round fix REGRESSED Phase $rp validation — not marking build green, NOT committing the fix." >&2
                            printf '\n## BUILD FAILED\nConvergence round %s fix regressed Phase %s validation. Fix left uncommitted for inspection.\n' "$convergence_round" "$rp" >> "$BUILD_LOG"
                            append_failure_reason_to_log "$rp_exit" "$TMP_OUT" "convergence round $convergence_round regressed Phase $rp validation"
                            all_done=false
                            review_ok=false
                            break
                        fi
                    done
                fi
                if ! $review_ok; then
                    echo ">> Regressing convergence fix left uncommitted — not committed, not pushed." >&2
                    break
                fi

                git -C "$WORKING_DIR" add -A
                git -C "$WORKING_DIR" reset -q -- "$BUILD_LOG" 2>/dev/null
                for plog in "$SPEC_DIR"/phase-*-claude.log "$SPEC_DIR/end-of-build-review.log"; do
                    [ -f "$plog" ] && git -C "$WORKING_DIR" reset -q -- "$plog" 2>/dev/null
                done
                if ! git -C "$WORKING_DIR" diff --cached --quiet 2>/dev/null; then
                    if ! git -C "$WORKING_DIR" commit -q -m "harness: convergence round $convergence_round fixes"; then
                        echo ">> ERROR: convergence round $convergence_round commit FAILED — not marking build green, fixes left staged." >&2
                        printf '\n## BUILD FAILED\nConvergence round %s commit failed (hook or index-lock); fixes left staged/uncommitted.\n' "$convergence_round" >> "$BUILD_LOG"
                        LAST_GATE="commit"
                        LAST_VALIDATION_COMMAND=""
                        append_failure_reason_to_log 1 "$TMP_OUT" "convergence round $convergence_round commit failed"
                        all_done=false
                        break
                    fi
                fi

                case "$verdict" in
                    "VERDICT: CONVERGED")
                        CONVERGE_STATUS="CONVERGED"
                        export CONVERGE_STATUS
                        convergence_done=true
                        printf '\n## CONVERGED — HUMAN-VERIFY REQUIRED\n' >> "$BUILD_LOG"
                        echo "## CONVERGED — HUMAN-VERIFY REQUIRED"
                        break
                        ;;
                    "VERDICT: NEEDS-FIXES")
                        convergence_round=$((convergence_round + 1))
                        ;;
                esac
            done

            if $all_done && ! $convergence_done; then
                CONVERGE_STATUS="CAPPED"
                export CONVERGE_STATUS
                all_done=false
                printf '\n## CAPPED — NOT CONVERGED (%s rounds) — build NOT green; human must resolve\n' "$CONVERGE_MAX_ROUNDS" >> "$BUILD_LOG"
                CURRENT_PHASE="end-of-build-convergence"
                LAST_GATE="convergence-review"
                LAST_VALIDATION_COMMAND=""
                record_failure_reason 1 "$TMP_OUT" "convergence review capped after $CONVERGE_MAX_ROUNDS rounds"
                printf '**Failure reason:**\n```text\n%s\n```\n' "$TERMINAL_FAILURE_REASON" >> "$BUILD_LOG"
                echo "## CAPPED — NOT CONVERGED ($CONVERGE_MAX_ROUNDS rounds) — build NOT green; human must resolve" >&2
            fi
        fi
    fi
fi

# Write the completion marker / print SUCCESS only after the end-of-build review
# has been re-validated. If the review regressed a gate, all_done is now false
# and "## BUILD FAILED" was already appended — so we must NOT mark green here.
if $all_done; then
    printf '\n## BUILD COMPLETE\n**Finished:** %s\n' "$(date)" >> "$BUILD_LOG"
    echo ""
    echo "SUCCESS: Build Complete!"
    echo "Build log: $BUILD_LOG"
fi

# Push to GitHub so the other machine can pull. Push the CURRENT branch — phase
# commits land on whatever branch the harness ran on, not necessarily main.
echo ""
echo "Pushing to GitHub..."
push_branch="$(git -C "$WORKING_DIR" rev-parse --abbrev-ref HEAD 2>/dev/null)"
if [ -z "$push_branch" ] || [ "$push_branch" = "HEAD" ]; then
    echo "WARNING: detached HEAD — skipping push (commits are local only)."
elif [ "$push_branch" = "main" ]; then
    git -C "$WORKING_DIR" push origin "$push_branch" 2>&1 || echo "WARNING: git push failed — commits are local only"
else
    remote_sha="$(git -C "$WORKING_DIR" ls-remote origin "refs/heads/$push_branch" 2>/dev/null | awk '{print $1}')"
    if [ -n "$remote_sha" ]; then
        git -C "$WORKING_DIR" push --force-with-lease="refs/heads/$push_branch:$remote_sha" origin "HEAD:refs/heads/$push_branch" 2>&1 || echo "WARNING: git push failed — commits are local only"
    else
        git -C "$WORKING_DIR" push origin "HEAD:refs/heads/$push_branch" 2>&1 || echo "WARNING: git push failed — commits are local only"
    fi
fi

# ── Final Step (A): auto-open the build PR ──
# REC-74 / REC-27: on a SUCCESSFUL build, open the PR for the build branch,
# carrying the session_workspace handoff-metadata block. Runs for BOTH the
# foreground /harness path and the detached /dispatch path. Every step is
# non-fatal: a missing gh, an existing PR, or a failed create never flips a green
# build red (the branch is already pushed).
if $all_done && ! $DRY_RUN \
   && [ -n "$push_branch" ] && [ "$push_branch" != "HEAD" ] && [ "$push_branch" != "main" ]; then
    # REC dashboard fix: FRESH var assigned ONLY in the two real PR-resolution
    # branches below — never inherits the env-init $PR_URL on a non-resolved
    # sub-path (the CRITICAL stale-env failure mode).
    DASH_PR_URL=""
    if command -v gh >/dev/null 2>&1; then
        if [ -n "$(git -C "$WORKING_DIR" log "origin/main..$push_branch" --oneline 2>/dev/null)" ]; then
            should_create_pr=false
            pr_create_reason=""
            pr_view_json=""
            if pr_view_json="$(cd "$WORKING_DIR" && gh pr view "$push_branch" --json number,state 2>/dev/null)"; then
                pr_state="$(printf '%s\n' "$pr_view_json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("state") or "")' 2>/dev/null || true)"
                pr_number="$(printf '%s\n' "$pr_view_json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("number") or "")' 2>/dev/null || true)"
                case "$pr_state" in
                    OPEN)
                        PR_URL="$(cd "$WORKING_DIR" && gh pr view "$push_branch" --json url -q .url 2>/dev/null || true)"
                        DASH_PR_URL="$PR_URL"
                        if [ -n "$PR_URL" ]; then
                            echo "Final-Step-A: PR already open for $push_branch${pr_number:+ (#$pr_number)} — skipping create."
                        else
                            echo "WARNING: Final-Step-A found an open PR for $push_branch but could not read its URL — branch is pushed; open the PR manually."
                        fi
                        ;;
                    CLOSED|MERGED)
                        should_create_pr=true
                        pr_create_reason="existing PR${pr_number:+ #$pr_number} is $pr_state"
                        ;;
                    *)
                        echo "WARNING: Final-Step-A could not determine PR state for $push_branch — branch is pushed; open the PR manually."
                        ;;
                esac
            else
                should_create_pr=true
                pr_create_reason="no existing PR found"
            fi

            if $should_create_pr; then
                [ -n "$pr_create_reason" ] && echo "Final-Step-A: $pr_create_reason for $push_branch — creating PR."
                # Derive actor + issue from the session_workspace branch convention
                # (<actor>/REC-NN-<slug>); fall back to the build coder / null.
                case "$push_branch" in
                    claude/*) pr_actor="claude" ;;
                    codex/*)  pr_actor="codex" ;;
                    *)        pr_actor="$DEFAULT_CODER" ;;
                esac
                pr_issue="$(printf '%s\n' "$push_branch" | grep -oE 'REC-[0-9]+' | head -1)"
                [ -n "$pr_issue" ] || pr_issue="null"
                sw="$SCRIPT_DIR/session_workspace.sh"
                pr_meta=""
                [ -x "$sw" ] && pr_meta="$("$sw" pr-metadata --issue "$pr_issue" --actor "$pr_actor" --worktree "$WORKING_DIR" 2>/dev/null || true)"
                pr_title="harness: $(basename "$SPEC_FILE" .md) ($push_branch)"
                pr_body="$(printf 'Automated harness build (Final-Step-A).\n\nSpec: %s\n\nConvergence: %s\n\n%s\n' "$(basename "$SPEC_FILE")" "$CONVERGE_STATUS" "$pr_meta")"
                if pr_create_output="$(cd "$WORKING_DIR" && gh pr create --base main --head "$push_branch" --title "$pr_title" --body "$pr_body" 2>&1)"; then
                    printf '%s\n' "$pr_create_output"
                    PR_URL="$(printf '%s\n' "$pr_create_output" | grep -Eo 'https://[^[:space:]]+/pull/[0-9]+' | tail -n 1 || true)"
                    DASH_PR_URL="$PR_URL"
                    echo "Final-Step-A: PR opened for $push_branch."
                else
                    printf '%s\n' "$pr_create_output"
                    echo "WARNING: Final-Step-A gh pr create failed — branch is pushed; open the PR manually."
                fi
            fi
        else
            echo "Final-Step-A: no commits ahead of origin/main on $push_branch — skipping PR."
        fi
    else
        echo "Final-Step-A: gh not found — skipping auto-PR (branch pushed; open PR manually)."
    fi
    # REC dashboard fix: stamp pr_url into status.json at the moment it is known.
    # pr_url-ONLY write (set-pr-url) — does NOT transition state, so the reaper keeps
    # terminal ownership (records --commit, fires the converged notify, reaps the
    # worktree). Keyed off DASH_PR_URL (a FRESH var from THIS run's gh resolution, not
    # the env-init $PR_URL). Dispatch mode only (RUN_DIR empty on foreground/local).
    # Non-fatal: never flips a green build red.
    if [ -n "$RUN_DIR" ] && [ -n "$DASH_PR_URL" ]; then
      if ! python3 "$SCRIPT_DIR/dispatch_status.py" set-pr-url \
            --run-dir "$RUN_DIR" --pr-url "$DASH_PR_URL" >/dev/null 2>&1; then
        # Non-fatal (never flips a green build red) BUT operator-VISIBLE: a swallowed
        # stamp failure would silently re-introduce the exact pr_url:null lie this build
        # kills, so warn loudly to the build log. (MAJOR — don't fail loud-less.)
        echo "WARNING: Final-Step-A: dispatch_status.py set-pr-url FAILED for $RUN_DIR (pr_url=$DASH_PR_URL) — status.json.pr_url may read null; the dashboard BUILDING/DONE axis could be wrong for this run." >&2
      fi
    fi
fi

# ── Final Step (B): auto-land the build PR after independent verification ──
# Every sub-step is non-fatal. If any eligibility check fails, the already-open
# PR stays open for review and the green build remains green.
if $all_done && ! $DRY_RUN && [ "${DISPATCH_AUTO_MERGE:-1}" = "1" ] \
   && [ -n "$push_branch" ] && [ "$push_branch" != "HEAD" ] && [ "$push_branch" != "main" ] \
   && command -v gh >/dev/null 2>&1 \
   && ( cd "$WORKING_DIR" && gh pr view "$push_branch" >/dev/null 2>&1 ); then
    echo ""
    autoland_reason=""
    autoland_skip_message=""

    case "$push_branch" in
        claude/*|codex/*) ;;
        *) autoland_reason="gates-failed" ;;
    esac

    if [ "${CONVERGE_STATUS:-}" != "CONVERGED" ]; then
        autoland_reason="gates-failed"
    fi

    if [ -z "$autoland_reason" ]; then
        if ! git -C "$WORKING_DIR" fetch origin --quiet; then
            autoland_reason="origin-fetch-failed"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        autoland_clean=false
        autoland_poll=1
        while [ "$autoland_poll" -le 10 ]; do
            autoland_view_json="$(cd "$WORKING_DIR" && gh pr view "$push_branch" --json mergeable,mergeStateStatus 2>/dev/null || true)"
            autoland_mergeable="$(printf '%s\n' "$autoland_view_json" | python3 -c 'import json,sys; print((json.load(sys.stdin).get("mergeable") or ""))' 2>/dev/null || true)"
            autoland_state="$(printf '%s\n' "$autoland_view_json" | python3 -c 'import json,sys; print((json.load(sys.stdin).get("mergeStateStatus") or ""))' 2>/dev/null || true)"
            if [ "$autoland_mergeable" = "MERGEABLE" ] && [ "$autoland_state" = "CLEAN" ]; then
                autoland_clean=true
                break
            fi
            case "$autoland_mergeable/$autoland_state" in
                CONFLICTING/*|*/CONFLICTING|*/BLOCKED|*/DIRTY)
                    break
                    ;;
            esac
            [ "$autoland_poll" -lt 10 ] && sleep 3
            autoland_poll=$((autoland_poll + 1))
        done
        if ! $autoland_clean; then
            autoland_reason="not-clean"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        autoland_head_json="$(cd "$WORKING_DIR" && gh pr view "$push_branch" --json headRefOid 2>/dev/null || true)"
        autoland_head_sha="$(printf '%s\n' "$autoland_head_json" | python3 -c 'import json,sys; print((json.load(sys.stdin).get("headRefOid") or ""))' 2>/dev/null || true)"
        if [ -z "$autoland_head_sha" ]; then
            autoland_reason="not-clean"
        elif ! git -C "$WORKING_DIR" rev-parse --verify --quiet "$autoland_head_sha^{commit}" >/dev/null; then
            autoland_reason="not-clean"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        if autoland_changed_paths="$(git -C "$WORKING_DIR" diff --name-only origin/main..."$autoland_head_sha" 2>/dev/null)"; then
            printf '%s\n' "$autoland_changed_paths" | autoland_diff_within_allowlist "$SPEC_FILE"
            autoland_allow_rc=$?
            if [ "$autoland_allow_rc" -eq 2 ]; then
                autoland_reason="no-allowlist-declared"
            elif [ "$autoland_allow_rc" -ne 0 ]; then
                autoland_reason="diff-exceeds-allowlist"
            fi
        else
            autoland_reason="diff-exceeds-allowlist"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        HEAD_TIP_LEASE="$(git -C "$WORKING_DIR" ls-tree -r --name-only "$autoland_head_sha" -- .session-lease.json 2>/dev/null)"
        if [ -n "$HEAD_TIP_LEASE" ]; then
            autoland_reason="leaked .session-lease.json in PR tip; strip the leaked lease first"
            autoland_skip_message="Final-Step-B: auto-land SKIPPED — leaked .session-lease.json in PR tip; strip the leaked lease first"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        autoland_review_gate="$SCRIPT_DIR/codex_review_gate.sh"
        if [ -f "$autoland_review_gate" ]; then
            autoland_review_output="$(
                cd "$WORKING_DIR" && bash "$autoland_review_gate" "$push_branch" --spec "$SPEC_FILE" --expected-head-oid "$autoland_head_sha" 2>&1
            )"
            autoland_review_rc=$?
            if [ -n "$autoland_review_output" ]; then
                printf '%s\n' "$autoland_review_output"
            fi
        else
            autoland_review_rc=127
            autoland_review_output="codex_review_gate.sh not found at $autoland_review_gate"
            printf '%s\n' "$autoland_review_output"
        fi
        if [ "$autoland_review_rc" -ne 0 ]; then
            autoland_reason="codex-review-not-approved"
            autoland_review_summary="$(printf '%s\n' "$autoland_review_output" | grep -E '^(codex_review_gate:|/)' | tail -n 3 | tr '\n' ' ' | sed 's/[[:space:]]*$//')"
            if [ -n "$autoland_review_summary" ]; then
                autoland_skip_message="Final-Step-B: left open for review (codex-review-not-approved: $autoland_review_summary)"
            fi
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        autoland_premerge_json="$(cd "$WORKING_DIR" && gh pr view "$push_branch" --json headRefOid 2>/dev/null || true)"
        autoland_premerge_sha="$(printf '%s\n' "$autoland_premerge_json" | python3 -c 'import json,sys; print((json.load(sys.stdin).get("headRefOid") or ""))' 2>/dev/null || true)"
        if [ "$autoland_premerge_sha" != "$autoland_head_sha" ]; then
            autoland_reason="head-moved-after-review"
            autoland_skip_message="Final-Step-B: auto-land SKIPPED — PR head moved after review; left open"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        HEAD_TIP_LEASE="$(git -C "$WORKING_DIR" ls-tree -r --name-only "$autoland_head_sha" -- .session-lease.json 2>/dev/null)"
        if [ -n "$HEAD_TIP_LEASE" ]; then
            autoland_reason="leaked .session-lease.json in PR tip; strip the leaked lease first"
            autoland_skip_message="Final-Step-B: auto-land SKIPPED — leaked .session-lease.json in PR tip; strip the leaked lease first"
        fi
    fi

    if [ -z "$autoland_reason" ]; then
        if ( cd "$WORKING_DIR" && gh pr merge "$push_branch" --squash --delete-branch --match-head-commit "$autoland_head_sha" </dev/null >/dev/null 2>&1 ); then
            echo "Final-Step-B: auto-merged $push_branch to main."
            printf '\n## AUTO-MERGED to main (Final-Step-B gates passed)\n' >> "$BUILD_LOG"
        else
            autoland_reason="not-clean"
        fi
    fi

    if [ -n "$autoland_reason" ]; then
        if [ -z "$autoland_skip_message" ]; then
            autoland_skip_message="Final-Step-B: left open for review ($autoland_reason)"
        fi
        if [ -n "${RUN_DIR:-}" ] && [ -f "$SCRIPT_DIR/dispatch_notify.sh" ]; then
            bash "$SCRIPT_DIR/dispatch_notify.sh" notify \
                --run-dir "$RUN_DIR" \
                --event AUTO_LAND_SKIPPED \
                --message "$autoland_skip_message" \
                --ntfy-only >/dev/null 2>&1 || true
        fi
        echo "$autoland_skip_message"
        printf '\n## AUTO-MERGE SKIPPED — PR left open for review (%s)\n' "$autoland_reason" >> "$BUILD_LOG"
        printf '%s\n' "$autoland_skip_message" >> "$BUILD_LOG"
    fi
fi

# Exit non-zero if the build did not complete — dispatch/CI must see the failure
# instead of treating a blocked/failed run as success.
if $all_done; then
    exit 0
else
    echo "FAILURE: build did not complete (blocked/failed). Exiting non-zero." >&2
    exit 1
fi
