#!/usr/bin/env bash
# Focused behavioral tests for harness reliability fixes.
set -uo pipefail

HERE="$(cd "$(dirname "$0")" && pwd)"
ORCH="$HERE/../harness_orchestrator.sh"
STATUS="$HERE/../dispatch_status.py"
WS="$HERE/../session_workspace.sh"
ONLY=""
PASS=0
FAIL=0

ok() {
    echo "  OK: $1"
    PASS=$((PASS + 1))
}

no() {
    echo "  FAIL: $1"
    FAIL=$((FAIL + 1))
}

has_f() {
    printf '%s\n' "$1" | grep -qF -- "$2"
}

while [ $# -gt 0 ]; do
    case "$1" in
        --only)
            if [ $# -lt 2 ]; then
                echo "FATAL: --only requires a test name" >&2
                exit 2
            fi
            ONLY="$2"
            shift 2
            ;;
        *)
            echo "FATAL: unexpected argument '$1'" >&2
            exit 2
            ;;
    esac
done

test -f "$ORCH" || { echo "FATAL: orchestrator not found at $ORCH"; exit 1; }
test -f "$STATUS" || { echo "FATAL: dispatch_status.py not found at $STATUS"; exit 1; }
test -f "$WS" || { echo "FATAL: session_workspace not found at $WS"; exit 1; }

run_orchestrator_with_stubs() {
    local mode="$1"
    local sbx="$2"
    local real="$sbx/$mode-real"
    local bin="$sbx/$mode-bin"
    local out="$sbx/$mode.out"
    local spec="$real/BUILD_SPEC.md"
    local codex_capture="$sbx/$mode.codex.prompt"
    local claude_capture="$sbx/$mode.claude.prompts"

    mkdir -p "$real" "$bin"
    git -C "$real" init -q
    git -C "$real" config user.email "harness-reliability@example.invalid"
    git -C "$real" config user.name "Harness Reliability"
    printf '[project]\nname = "harness-reliability"\nversion = "0.0.0"\n' > "$real/pyproject.toml"
    printf 'initial\n' > "$real/README.md"
    cat > "$spec" <<'SPEC'
# BUILD_SPEC - spec-aware convergence review test

## Phase 1: Contract marker
engine: claude
depends_on: none

### Requirements
- Implement SPEC_AWARE_REVIEW_UNIQUE_REQUIREMENT in the production diff.

### Validation
```bash
test -f README.md
```
SPEC
    git -C "$real" add pyproject.toml README.md BUILD_SPEC.md
    git -C "$real" commit -qm "init"

    cat > "$bin/claude" <<'STUB'
#!/usr/bin/env bash
prompt=""
while [ $# -gt 0 ]; do
    case "$1" in
        -p)
            shift
            prompt="${1:-}"
            ;;
    esac
    shift || true
done
{
    printf '%s\n' '---CLAUDE-PROMPT---'
    printf '%s\n' "$prompt"
} >> "$CLAUDE_CAPTURE"
printf 'stub claude invoked\n'
exit 0
STUB
    cat > "$bin/codex" <<'STUB'
#!/usr/bin/env bash
last=""
for arg in "$@"; do
    last="$arg"
done
printf '%s\n' "$last" > "$CODEX_CAPTURE"
printf 'VERDICT: CONVERGED\n'
exit 0
STUB
    chmod +x "$bin/claude" "$bin/codex"

    if [ "$mode" = "codex" ]; then
        PATH="$bin:$PATH" \
        HARNESS_CODEX_BIN="$bin/codex" \
        CODEX_CAPTURE="$codex_capture" \
        CLAUDE_CAPTURE="$claude_capture" \
        DISPATCH_RUN_DIR="" \
        CONVERGE_MAX_ROUNDS=1 \
            bash -e "$ORCH" --coder claude --dir "$real" --no-codex-spec-review "$spec" > "$out" 2>&1
    else
        PATH="$bin:$PATH" \
        HARNESS_CODEX_BIN="$bin/not-a-codex" \
        CODEX_CAPTURE="$codex_capture" \
        CLAUDE_CAPTURE="$claude_capture" \
        DISPATCH_RUN_DIR="" \
        CONVERGE_MAX_ROUNDS=1 \
            bash -e "$ORCH" --coder claude --dir "$real" --no-codex-spec-review "$spec" > "$out" 2>&1
    fi
    local rc=$?
    if [ "$rc" -ne 0 ]; then
        echo "Harness run failed for $mode (exit $rc). Output:" >&2
        cat "$out" >&2
        return "$rc"
    fi
    printf '%s\n' "$spec"
}

test_spec_aware_review() {
    local sbx
    sbx="$(mktemp -d)"
    trap "rm -rf '$sbx'" EXIT

    local codex_spec
    codex_spec="$(run_orchestrator_with_stubs codex "$sbx")" || { no "codex convergence prompt constructed"; return; }
    local codex_prompt
    codex_prompt="$(cat "$sbx/codex.codex.prompt" 2>/dev/null || true)"
    has_f "$codex_prompt" "Review contract file: $codex_spec" \
        && ok "codex convergence prompt references the BUILD_SPEC path" \
        || no "codex convergence prompt does not reference the BUILD_SPEC path"
    has_f "$codex_prompt" "SPEC_AWARE_REVIEW_UNIQUE_REQUIREMENT" \
        && ok "codex convergence prompt includes BUILD_SPEC content" \
        || no "codex convergence prompt does not include BUILD_SPEC content"
    has_f "$codex_prompt" "You MUST flag any test that mocks/stubs/fakes the exact thing under test" \
        && ok "codex convergence prompt contains anti-mock directive" \
        || no "codex convergence prompt missing anti-mock directive"

    local fallback_spec
    fallback_spec="$(run_orchestrator_with_stubs fallback "$sbx")" || { no "fallback review prompt constructed"; return; }
    local fallback_prompts
    fallback_prompts="$(cat "$sbx/fallback.claude.prompts" 2>/dev/null || true)"
    has_f "$fallback_prompts" "/code-review high --fix" \
        && ok "fallback review invokes /code-review high --fix" \
        || no "fallback review did not invoke /code-review high --fix"
    has_f "$fallback_prompts" "Review contract file: $fallback_spec" \
        && ok "fallback review prompt references the BUILD_SPEC path" \
        || no "fallback review prompt does not reference the BUILD_SPEC path"
    has_f "$fallback_prompts" "SPEC_AWARE_REVIEW_UNIQUE_REQUIREMENT" \
        && ok "fallback review prompt includes BUILD_SPEC content" \
        || no "fallback review prompt does not include BUILD_SPEC content"
    has_f "$fallback_prompts" "You MUST flag any test that mocks/stubs/fakes the exact thing under test" \
        && ok "fallback review prompt contains anti-mock directive" \
        || no "fallback review prompt missing anti-mock directive"
}

# Phase 2: a blocked build must self-describe WHY. Drive a real post-phase gate
# failure (a validation that returns non-zero) under dispatch mode, then assert
# against the actual written artifacts — the build log AND status.json — not a
# grep of the orchestrator source.
test_block_reason() {
    local sbx
    sbx="$(mktemp -d)"

    local work="$sbx/work"
    local bin="$sbx/bin"
    local run_dir="$sbx/run"
    local attempt_dir="$run_dir/attempt-001"
    local build_log="$attempt_dir/build-log.md"
    local out="$sbx/dispatch.out"
    local spec="$work/BUILD_SPEC.md"

    mkdir -p "$work" "$bin" "$attempt_dir"
    git -C "$work" init -q
    git -C "$work" config user.email "harness-reliability@example.invalid"
    git -C "$work" config user.name "Harness Reliability"
    printf '[project]\nname = "harness-reliability"\nversion = "0.0.0"\n' > "$work/pyproject.toml"
    git -C "$work" add pyproject.toml
    git -C "$work" commit -qm "init"

    cat > "$spec" <<'SPEC'
# BUILD_SPEC - block-reason observability test

## Phase 1: Forced gate failure
engine: claude
depends_on: none

### Requirements
- Intentionally fail validation to exercise the block-reason path.

### Validation
```bash
echo "FAILED tests/test_block_reason.py::test_forced"
false
```
SPEC

    # Coder stub that makes NO changes, so the phase can never pass and the
    # build blocks after retries are exhausted.
    cat > "$bin/claude" <<'STUB'
#!/usr/bin/env bash
printf 'stub claude invoked\n'
exit 0
STUB
    chmod +x "$bin/claude"

    local commit
    commit="$(git -C "$work" rev-parse HEAD)"
    python3 "$STATUS" init \
        --run-dir "$run_dir" \
        --issue REC-BLOCK \
        --branch block-reason-test \
        --worktree "$work" \
        --spec "$spec" \
        --last-validated-commit "$commit" >/dev/null

    PATH="$bin:$PATH" \
    DISPATCH_RUN_DIR="$run_dir" \
    DISPATCH_ATTEMPT=1 \
        bash "$ORCH" --coder claude --dir "$work" --log "$build_log" --no-codex-spec-review "$spec" > "$out" 2>&1
    local rc=$?

    [ "$rc" -ne 0 ] \
        && ok "forced gate failure exits nonzero" \
        || no "forced gate failure exited zero"

    # (a) The build log records the block AND a non-empty reason naming the gate.
    local log_text=""
    [ -f "$build_log" ] && log_text="$(cat "$build_log")"
    has_f "$log_text" "## BUILD BLOCKED" \
        && ok "build log records BUILD BLOCKED" \
        || no "build log missing BUILD BLOCKED"
    has_f "$log_text" "**Failure reason:**" \
        && ok "build log records a failure-reason block" \
        || no "build log missing failure-reason block"
    has_f "$log_text" "Gate/step: validation" \
        && ok "build log failure reason names the failing gate" \
        || no "build log failure reason does not name the failing gate"
    has_f "$log_text" "Exit code:" \
        && ok "build log failure reason records the exit code" \
        || no "build log failure reason missing the exit code"

    [ -f "$attempt_dir/terminal_status.json" ] \
        && ok "dispatch-mode terminal_status.json written" \
        || no "dispatch-mode terminal_status.json missing"

    # (b) A terminal transition persists a non-null failure_reason naming the
    #     gate into status.json (so /unpause + the reaper read WHY, not None).
    python3 "$STATUS" transition \
        --run-dir "$run_dir" \
        --state CAPPED_NEEDS_HUMAN >/dev/null 2>&1

    python3 - "$run_dir/status.json" <<'PY'
import json
import sys

with open(sys.argv[1], "r", encoding="utf-8") as handle:
    data = json.load(handle)
reason = data.get("failure_reason")
assert reason, f"failure_reason is null/missing: {data!r}"
assert "validation" in reason, f"failure_reason does not name the gate: {reason!r}"
assert data.get("last_gate") == "validation", f"last_gate not persisted: {data.get('last_gate')!r}"
PY
    if [ $? -eq 0 ]; then
        ok "status.json has a non-null failure_reason naming the gate"
    else
        no "status.json failure_reason missing or does not name the gate"
    fi

    rm -rf "$sbx"
}

run_autopr_case() {
    local mode="$1"
    local sbx="$2"
    local remote="$sbx/$mode-origin.git"
    local work="$sbx/$mode-work"
    local bin="$sbx/$mode-bin"
    local run_dir="$sbx/$mode-run"
    local build_log="$sbx/$mode-build-log.md"
    local out="$sbx/$mode.out"
    local gh_log="$sbx/$mode-gh.log"
    local spec="$work/BUILD_SPEC.md"
    local branch="codex/REC-94-autopr-$mode"

    mkdir -p "$bin"
    git init --bare -q "$remote"
    git init -q "$work"
    git -C "$work" config user.email "harness-reliability@example.invalid"
    git -C "$work" config user.name "Harness Reliability"
    printf '[project]\nname = "harness-autopr"\nversion = "0.0.0"\n' > "$work/pyproject.toml"
    printf 'initial\n' > "$work/README.md"
    cat > "$spec" <<'SPEC'
# BUILD_SPEC - auto-PR robustness test

**Phases:** 1

## Phase 1: Make branch ahead
engine: claude
depends_on: none

### Requirements
- Create phase-output.txt so the build branch has a real committed change.

### Validation
```bash
test -f phase-output.txt
```
SPEC
    git -C "$work" add pyproject.toml README.md BUILD_SPEC.md
    git -C "$work" commit -qm "init"
    git -C "$work" branch -M main
    git -C "$work" remote add origin "$remote"
    git -C "$work" push -q -u origin main
    git -C "$work" checkout -qb "$branch"

    cat > "$bin/claude" <<'STUB'
#!/usr/bin/env bash
printf 'implemented\n' > phase-output.txt
printf 'stub claude implemented phase\n'
exit 0
STUB
    cat > "$bin/codex" <<'STUB'
#!/usr/bin/env bash
printf 'VERDICT: CONVERGED\n'
exit 0
STUB
    cat > "$bin/gh" <<'STUB'
#!/usr/bin/env bash
printf '%s\n' "$*" >> "$GH_LOG"
if [ "${GH_MODE:-}" = "fail" ]; then
    printf 'simulated gh failure\n' >&2
    exit 1
fi
if [ "$1" = "pr" ] && [ "$2" = "view" ]; then
    case "${GH_MODE:-}" in
        closed)
            if printf '%s\n' "$*" | grep -q -- '--json number,state'; then
                printf '{"number":36,"state":"CLOSED"}\n'
            else
                printf 'https://github.com/example/repo/pull/36\n'
            fi
            exit 0
            ;;
        open)
            if printf '%s\n' "$*" | grep -q -- '--json number,state'; then
                printf '{"number":37,"state":"OPEN"}\n'
            else
                printf 'https://github.com/example/repo/pull/37\n'
            fi
            exit 0
            ;;
    esac
    exit 1
fi
if [ "$1" = "pr" ] && [ "$2" = "create" ]; then
    printf 'https://github.com/example/repo/pull/99\n'
    exit 0
fi
printf 'unexpected gh invocation: %s\n' "$*" >&2
exit 2
STUB
    chmod +x "$bin/claude" "$bin/codex" "$bin/gh"

    local commit
    commit="$(git -C "$work" rev-parse HEAD)"
    python3 "$STATUS" init \
        --run-dir "$run_dir" \
        --issue REC-94 \
        --branch "$branch" \
        --worktree "$work" \
        --spec "$spec" \
        --last-validated-commit "$commit" >/dev/null

    PATH="$bin:$PATH" \
    HARNESS_CODEX_BIN="$bin/codex" \
    GH_MODE="$mode" \
    GH_LOG="$gh_log" \
    DISPATCH_RUN_DIR="$run_dir" \
    DISPATCH_ATTEMPT=1 \
    CONVERGE_MAX_ROUNDS=1 \
        bash "$ORCH" --coder claude --dir "$work" --log "$build_log" --no-codex-spec-review "$spec" > "$out" 2>&1
    local rc=$?
    if [ "$rc" -ne 0 ]; then
        echo "Harness auto-PR run failed for $mode (exit $rc). Output:" >&2
        cat "$out" >&2
        return "$rc"
    fi

    printf '%s\n' "$run_dir/attempt-001/terminal_status.json"
}

assert_terminal_pr_url() {
    local terminal_status="$1"
    local expected="$2"
    python3 - "$terminal_status" "$expected" <<'PY'
import json
import sys

with open(sys.argv[1], "r", encoding="utf-8") as handle:
    data = json.load(handle)
expected = sys.argv[2]
actual = data.get("pr_url")
if expected == "__NONE__":
    assert actual in (None, ""), f"expected no pr_url, got {actual!r}"
else:
    assert actual == expected, f"expected pr_url {expected!r}, got {actual!r}"
assert data.get("exit_code") == 0, f"build should stay green: {data!r}"
PY
}

test_autopr() {
    local sbx
    sbx="$(mktemp -d)"

    local closed_terminal
    closed_terminal="$(run_autopr_case closed "$sbx")" || { no "closed PR auto-PR run stays green"; rm -rf "$sbx"; return; }
    has_f "$(cat "$sbx/closed-gh.log" 2>/dev/null || true)" "pr create" \
        && ok "closed PR branch creates a fresh PR" \
        || no "closed PR branch did not invoke gh pr create"
    assert_terminal_pr_url "$closed_terminal" "https://github.com/example/repo/pull/99"
    if [ $? -eq 0 ]; then
        ok "closed PR branch records the fresh PR URL"
    else
        no "closed PR branch recorded the stale closed PR URL"
    fi

    local open_terminal
    open_terminal="$(run_autopr_case open "$sbx")" || { no "open PR auto-PR run stays green"; rm -rf "$sbx"; return; }
    if has_f "$(cat "$sbx/open-gh.log" 2>/dev/null || true)" "pr create"; then
        no "open PR branch invoked gh pr create"
    else
        ok "open PR branch reuses the existing PR without create"
    fi
    assert_terminal_pr_url "$open_terminal" "https://github.com/example/repo/pull/37"
    if [ $? -eq 0 ]; then
        ok "open PR branch records the existing open PR URL"
    else
        no "open PR branch did not record the existing open PR URL"
    fi

    local fail_terminal
    fail_terminal="$(run_autopr_case fail "$sbx")" || { no "gh failure auto-PR run stays green"; rm -rf "$sbx"; return; }
    assert_terminal_pr_url "$fail_terminal" "__NONE__"
    if [ $? -eq 0 ]; then
        ok "gh failure leaves PR URL empty while build stays green"
    else
        no "gh failure changed the green build or recorded a PR URL"
    fi

    rm -rf "$sbx"
}

test_worktree_fresh() {
    local sbx
    sbx="$(mktemp -d)"

    local remote="$sbx/origin.git"
    local seed="$sbx/seed"
    local repo="$sbx/repo"
    local sessions="$sbx/sessions"
    local ledger="$sbx/ledger.jsonl"
    local out="$sbx/create.out"

    git init --bare -q "$remote"
    git clone -q "$remote" "$seed" >/dev/null 2>&1
    git -C "$seed" config user.email "harness-reliability@example.invalid"
    git -C "$seed" config user.name "Harness Reliability"
    printf 'base-a\n' > "$seed/README.md"
    git -C "$seed" add README.md
    git -C "$seed" commit -qm "base A"
    git -C "$seed" branch -M main
    git -C "$seed" push -q -u origin main
    git --git-dir="$remote" symbolic-ref HEAD refs/heads/main

    git clone -q "$remote" "$repo"
    git -C "$repo" config user.email "harness-reliability@example.invalid"
    git -C "$repo" config user.name "Harness Reliability"

    printf 'base-b\n' > "$seed/README.md"
    git -C "$seed" add README.md
    git -C "$seed" commit -qm "base B"
    git -C "$seed" push -q origin main

    local remote_tip stale_tracking local_main
    remote_tip="$(git --git-dir="$remote" rev-parse refs/heads/main)"
    stale_tracking="$(git -C "$repo" rev-parse origin/main)"
    local_main="$(git -C "$repo" rev-parse main)"

    if [ "$stale_tracking" != "$remote_tip" ] && [ "$local_main" != "$remote_tip" ]; then
        ok "fixture has stale local main and stale origin/main before create"
    else
        no "fixture did not create a stale local checkout"
        rm -rf "$sbx"
        return
    fi

    REPO_ROOT="$repo" \
    SESSION_WS_ALLOW_ANY_REPO=1 \
    SESSIONS_ROOT="$sessions" \
    LEDGER="$ledger" \
    HOST="harness-host" \
    SID="freshness-sid" \
        bash "$WS" create --actor codex --issue REC-94 --slug worktree-fresh > "$out" 2>&1
    local rc=$?
    if [ "$rc" -ne 0 ]; then
        no "session_workspace create succeeds from stale local checkout"
        cat "$out" >&2
        rm -rf "$sbx"
        return
    fi

    local wt refreshed_tracking branch_base
    wt="$(sed -n 's/.*worktree: //p' "$out" | tail -1)"
    if [ -n "$wt" ] && [ -e "$wt/.git" ]; then
        ok "session_workspace create produced an isolated worktree"
    else
        no "session_workspace create did not report a usable worktree"
        rm -rf "$sbx"
        return
    fi

    refreshed_tracking="$(git -C "$repo" rev-parse origin/main)"
    [ "$refreshed_tracking" = "$remote_tip" ] \
        && ok "create refreshes origin/main before selecting the base" \
        || no "create left origin/main stale"

    branch_base="$(git -C "$wt" rev-parse HEAD^ 2>/dev/null || true)"
    [ "$branch_base" = "$remote_tip" ] \
        && ok "created worktree branch is based on fresh origin/main" \
        || no "created worktree branch was not based on fresh origin/main"

    rm -rf "$sbx"
}

case "${ONLY:-all}" in
    spec_aware_review)
        test_spec_aware_review
        ;;
    block_reason)
        test_block_reason
        ;;
    autopr)
        test_autopr
        ;;
    worktree_fresh)
        test_worktree_fresh
        ;;
    all)
        test_spec_aware_review
        test_block_reason
        test_autopr
        test_worktree_fresh
        ;;
    *)
        echo "FATAL: unknown --only test '$ONLY'" >&2
        exit 2
        ;;
esac

echo "--------"
echo "PASS=$PASS  FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
