#!/usr/bin/env bash
# Self-contained Phase-3 test for the REC-229 pr_url-at-source stamp.
# (a) pr_url-only CLI contract  (b) STATIC orchestrator wiring asserts
# (c) end-to-end emission on self-built fixtures (NEVER sources the orchestrator).
set -euo pipefail

HERE="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$HERE/../../.." && pwd)"
STATUS="$ROOT/pipeline/tools/dispatch_status.py"
ORCH="$ROOT/pipeline/tools/harness_orchestrator.sh"

TMP="$(mktemp -d)"
trap 'rm -rf "$TMP"' EXIT

PASS=0
FAIL=0
ok()   { PASS=$((PASS+1)); printf 'PASS: %s\n' "$1"; }
bad()  { FAIL=$((FAIL+1)); printf 'FAIL: %s\n' "$1"; }

jget() { python3 -c "import json,sys; v=json.load(open(sys.argv[1])).get(sys.argv[2]); print('' if v is None else v)" "$1" "$2"; }

# ───────────────────────── (a) pr_url-only CLI contract ─────────────────────────
RUN="$TMP/run-a"
touch "$TMP/spec.md"
python3 "$STATUS" init --run-dir "$RUN" --issue REC-229 --branch test \
  --worktree "$TMP" --spec "$TMP/spec.md" --last-validated-commit abc123 >/dev/null

[ "$(jget "$RUN/status.json" pr_url)" = "" ] && ok "a1: post-init pr_url null" || bad "a1: post-init pr_url null"
[ "$(jget "$RUN/status.json" state)" = "STARTED" ] && ok "a1: post-init state STARTED" || bad "a1: post-init state STARTED"
DIRTY_BEFORE="$(jget "$RUN/status.json" linear_projection_dirty)"

python3 "$STATUS" set-pr-url --run-dir "$RUN" --pr-url "https://github.com/x/y/pull/123" >/dev/null

[ "$(jget "$RUN/status.json" pr_url)" = "https://github.com/x/y/pull/123" ] && ok "a3: pr_url stamped" || bad "a3: pr_url stamped"
[ "$(jget "$RUN/status.json" state)" = "STARTED" ] && ok "a3: state STILL STARTED (reaper keeps terminal ownership)" || bad "a3: state unchanged"
[ "$(jget "$RUN/status.json" linear_projection_dirty)" = "$DIRTY_BEFORE" ] && ok "a3: linear_projection_dirty unchanged" || bad "a3: linear_projection_dirty unchanged"

# (a4) stamp emits no Linear-projection event of its own
if grep -q 'pr_url_stamped' "$RUN/events.jsonl" && ! grep -q 'linear_projection' "$RUN/events.jsonl"; then
  ok "a4: stamp emits pr_url_stamped and NO linear_projection event"
else
  bad "a4: stamp emits pr_url_stamped and NO linear_projection event"
fi

# ───────────────────────── (b) STATIC wiring assertions ─────────────────────────
# Extract the TRUE Final-Step-A success block via if/fi depth-tracking awk.
# Final-Step-A is the FIRST `if $all_done && ! $DRY_RUN \` block (the auto-PR one;
# Final-Step-B that follows is a single-line `if ... ;` opener, not the `\`-continued one).
BLOCK="$(awk '
  $0 ~ /^if \$all_done && ! \$DRY_RUN \\$/ && !cap { cap=1 }
  cap {
    print
    if ($0 ~ /(^|[^[:alnum:]_])if([^[:alnum:]_]|$)/) d++
    if ($0 ~ /(^|[^[:alnum:]_])fi([^[:alnum:]_]|$)/) d--
    if (d==0 && NR>1) exit
  }
' "$ORCH")"

# b1: contains set-pr-url with --pr-url "$DASH_PR_URL"; does NOT transition CONVERGED
if printf '%s' "$BLOCK" | grep -q 'dispatch_status.py set-pr-url' \
   && printf '%s' "$BLOCK" | grep -q -- '--pr-url "\$DASH_PR_URL"'; then
  ok "b1: block calls set-pr-url --pr-url \$DASH_PR_URL"
else
  bad "b1: block calls set-pr-url --pr-url \$DASH_PR_URL"
fi
if printf '%s' "$BLOCK" | grep -q 'transition.*CONVERGED_PR_CREATED'; then
  bad "b1: block must NOT transition CONVERGED_PR_CREATED"
else
  ok "b1: block does not transition state (R14 guard)"
fi

# b2: guarded on BOTH [ -n "$RUN_DIR" ] and [ -n "$DASH_PR_URL" ]
if printf '%s' "$BLOCK" | grep -q '\[ -n "\$RUN_DIR" \] && \[ -n "\$DASH_PR_URL" \]'; then
  ok "b2: stamp guarded on RUN_DIR && DASH_PR_URL"
else
  bad "b2: stamp guarded on RUN_DIR && DASH_PR_URL"
fi

# b3: DASH_PR_URL="" at block top AND DASH_PR_URL="$PR_URL" in BOTH branches
init_count="$(printf '%s' "$BLOCK" | grep -c 'DASH_PR_URL=""' || true)"
assign_count="$(printf '%s' "$BLOCK" | grep -c 'DASH_PR_URL="\$PR_URL"' || true)"
if [ "$init_count" -ge 1 ] && [ "$assign_count" -eq 2 ]; then
  ok "b3: DASH_PR_URL init once + assigned in BOTH resolution branches"
else
  bad "b3: DASH_PR_URL init=$init_count assigns=$assign_count (want init>=1, assigns=2)"
fi

# b4: NON-FATAL (no bare exit) BUT operator-VISIBLE (WARNING:) inside the stamp guard
STAMP_GUARD="$(printf '%s' "$BLOCK" | awk '/if \[ -n "\$RUN_DIR" \] && \[ -n "\$DASH_PR_URL" \]/{g=1} g{print} g&&/^[[:space:]]*fi[[:space:]]*$/{exit}')"
if printf '%s' "$STAMP_GUARD" | grep -q 'set-pr-url' \
   && printf '%s' "$STAMP_GUARD" | grep -q 'WARNING' \
   && ! printf '%s' "$STAMP_GUARD" | grep -qE '(^|[^[:alnum:]_])exit([^[:alnum:]_]|$)'; then
  ok "b4: stamp non-fatal (no bare exit) + WARNING on failure"
else
  bad "b4: stamp non-fatal + WARNING on failure"
fi

# b5: bash -n clean
if bash -n "$ORCH"; then ok "b5: bash -n clean"; else bad "b5: bash -n clean"; fi

# b6: set-pr-url appears exactly once in the block
spu_count="$(printf '%s' "$BLOCK" | grep -c 'dispatch_status.py set-pr-url' || true)"
[ "$spu_count" -eq 1 ] && ok "b6: set-pr-url exactly once in block" || bad "b6: set-pr-url count=$spu_count (want 1)"

# b7: :902 emission reads stamped value with NO env fallback
# Heredoc-aware extraction (the function embeds a <<'PY' python heredoc whose
# dict close `}` at column 0 must NOT be mistaken for the function end).
extract_emit() {
  awk '
    /^__emit_terminal_status\(\)/{g=1}
    g{
      print
      if ($0 ~ /<<.?PY.?$/) inpy=1
      else if (inpy && $0 ~ /^PY$/) inpy=0
      else if (!inpy && $0 ~ /^}$/) exit
    }
  ' "$1"
}
EMIT_BODY="$(extract_emit "$ORCH")"
PR_LINE="$(printf '%s\n' "$EMIT_BODY" | grep -E '^[[:space:]]*pr_url=' | head -1)"
if printf '%s' "$PR_LINE" | grep -q 'pr_url="\$(__status_field pr_url)"'; then
  ok "b7.i: emit pr_url line is exactly \$(__status_field pr_url)"
else
  bad "b7.i: emit pr_url line is exactly \$(__status_field pr_url) — got: $PR_LINE"
fi
if grep -q 'pr_url="\${PR_URL:-\$(__status_field pr_url)}"' "$ORCH"; then
  bad "b7.ii: old env-first pr_url resolution still present"
else
  ok "b7.ii: old env-first pr_url resolution removed"
fi
if printf '%s\n' "$EMIT_BODY" | grep -E '^[[:space:]]*pr_url=' | grep -qE '\$PR_URL|\$\{PR_URL'; then
  bad "b7.iii: emit pr_url assignment references \$PR_URL"
else
  ok "b7.iii: emit pr_url assignment has no \$PR_URL reference"
fi

# ───────────────────────── (c) end-to-end emission (extracted emitter ONLY) ─────────────────────────
# Extract __emit_terminal_status + __status_field + __attempt_dir into a temp snippet
# and source ONLY that — the orchestrator's top-level body NEVER runs.
SNIPPET="$TMP/emit_snippet.sh"
{
  awk '/^__attempt_dir\(\)/{g=1} g{print} g&&/^}/{exit}' "$ORCH"
  # __status_field embeds a <<'PY' heredoc — extract heredoc-aware.
  awk '/^__status_field\(\)/{g=1} g{print; if ($0 ~ /<<.?PY.?$/) inpy=1; else if (inpy && $0 ~ /^PY$/) inpy=0; else if (!inpy && $0 ~ /^}$/) exit}' "$ORCH"
  extract_emit "$ORCH"
} > "$SNIPPET"

run_emit() {  # $1=run_dir  (PR_URL passed via env by caller)
  local rd="$1"
  (
    set +u
    RUN_DIR="$rd" ATTEMPT=1 CONVERGE_STATUS="CONVERGED" CURRENT_PHASE="phase-1" \
    LAST_GATE="" LAST_VALIDATION_COMMAND="" TERMINAL_CAUSE_HINT="" \
    TERMINAL_FAILURE_REASON="" WORKING_DIR="$rd" BUILD_LOG="" TMP_OUT="$TMP/out.txt" \
    push_branch="test"
    export RUN_DIR ATTEMPT CONVERGE_STATUS CURRENT_PHASE LAST_GATE LAST_VALIDATION_COMMAND \
      TERMINAL_CAUSE_HINT TERMINAL_FAILURE_REASON WORKING_DIR BUILD_LOG TMP_OUT push_branch
    touch "$TMP/out.txt"
    # shellcheck disable=SC1090
    source "$SNIPPET"
    __emit_terminal_status 0 >/dev/null 2>&1
  )
}

# c1: stamp+emit propagation — stamped URL is emitted into terminal_status.json
RUNC1="$TMP/run-c1"
python3 "$STATUS" init --run-dir "$RUNC1" --issue REC-229 --branch test \
  --worktree "$RUNC1" --spec "$TMP/spec.md" --last-validated-commit abc >/dev/null
python3 "$STATUS" set-pr-url --run-dir "$RUNC1" --pr-url "https://github.com/x/y/pull/777" >/dev/null
( unset PR_URL; run_emit "$RUNC1" )
TS_C1="$RUNC1/attempt-001/terminal_status.json"
if [ -f "$TS_C1" ] && [ "$(jget "$TS_C1" pr_url)" = "https://github.com/x/y/pull/777" ]; then
  ok "c1: stamped pr_url emitted into terminal_status.json"
else
  bad "c1: stamped pr_url emitted (got: $([ -f "$TS_C1" ] && jget "$TS_C1" pr_url || echo MISSING))"
fi

# c2: NEGATIVE env-leak — no stamp, stale PR_URL in env, emitted pr_url must NOT be the garbage
RUNC2="$TMP/run-c2"
python3 "$STATUS" init --run-dir "$RUNC2" --issue REC-229 --branch test \
  --worktree "$RUNC2" --spec "$TMP/spec.md" --last-validated-commit abc >/dev/null
( export PR_URL="https://github.com/EVIL/GARBAGE/pull/666"; run_emit "$RUNC2" )
TS_C2="$RUNC2/attempt-001/terminal_status.json"
EMITTED_C2="$([ -f "$TS_C2" ] && jget "$TS_C2" pr_url || echo MISSING)"
if [ -f "$TS_C2" ] && [ "$EMITTED_C2" != "https://github.com/EVIL/GARBAGE/pull/666" ]; then
  ok "c2: stale env PR_URL does NOT leak into terminal_status.json (got: '$EMITTED_C2')"
else
  bad "c2: stale env PR_URL leaked into terminal_status.json (got: '$EMITTED_C2')"
fi

printf '\nRESULT: PASS=%d FAIL=%d\n' "$PASS" "$FAIL"
[ "$FAIL" -eq 0 ]
