#!/usr/bin/env bash
# Validation gate for harness_orchestrator.sh's dispatch-audit SAFETY gate
# (run_dispatch_audit_gate / dispatch_target_lines / get_dispatch_touching_targets).
#
# Pins the NEVER-WEAKEN property: a spec that genuinely modifies the live
# dispatch path (providers, model_profiles.json, PROMPT_BIBLE) must be BLOCKED
# unless it carries a final audit_dispatch.py phase — REGARDLESS of how its
# "Files to modify" heading is styled (level, prefix, OR capitalization). It ALSO
# pins the two opposing failure modes Codex's adversarial reviews surfaced:
#   - UNDER-match (the dangerous one): commit 5cbf2e76 anchored the opener to a bare
#     lowercase `^### Files to modify`, so flora.py under `### AREA 5. Files to modify`,
#     `## Files to modify`, `### Files to Modify`, `## FILES TO MODIFY`, ... slipped
#     through. → a real dispatch change ships unaudited.
#   - OVER-match (fail-safe but a regression of the 5cbf2e76 docs fix): a too-loose
#     opener lets a PROSE heading that merely contains "files to modify" open file-list
#     mode and over-capture cited paths → a legit docs build is falsely blocked.
# Also pins the false-positive fix itself (dispatch files CITED in Requirements prose
# must NOT be scanned).
#
# Self-contained: local parsing only, no model dispatch, no gh/origin. Uses the
# documented --dry-run preflight, which runs the gate before the dry-run exit.
set -uo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
ORCH="$HERE/../harness_orchestrator.sh"
REPO="$(cd "$HERE/../../../.." && pwd)"
PASS=0; FAIL=0
ok(){ echo "  OK: $1"; PASS=$((PASS+1)); }
no(){ echo "  FAIL: $1"; FAIL=$((FAIL+1)); }

test -f "$ORCH" || { echo "FATAL: orchestrator not found at $ORCH"; exit 1; }
bash -n "$ORCH" && ok "orchestrator bash -n clean" || no "orchestrator syntax error"

SBX="$(mktemp -d)"; trap 'rm -rf "$SBX"' EXIT

# Run the gate via --dry-run and classify: BLOCKED iff the gate's refusal fires.
gate_result() {
    local out
    out="$(cd "$SBX" && bash "$ORCH" --dry-run "$1" 2>&1)"
    if printf '%s\n' "$out" | grep -q 'BUILD BLOCKED: dispatch-audit phase required'; then
        echo BLOCKED
    else
        echo PASSED
    fi
}
# assert_gate <fixture-file> <expected BLOCKED|PASSED> <label>
assert_gate() {
    local got; got="$(gate_result "$1")"
    if [ "$got" = "$2" ]; then ok "$3 (→ $got)"; else no "$3 (expected $2, got $got)"; fi
}

# A real dispatch-path modification with NO audit phase. Heading style varies.
mk_dispatch_spec() { # $1 = heading line, $2 = out file
    cat > "$2" <<SPEC
# BUILD_SPEC — dispatch-touch test

## Phase 1: Touch the dispatch path
engine: codex
$1
- \`recoil/execution/providers/flora.py\` — change a real code path
Validation command: \`true\`
SPEC
}

# ── NEVER-WEAKEN fixtures: every heading style must BLOCK (no audit phase) ──
# Plain indexed array (macOS bash 3.2 has no `declare -A`).
WEAKEN_HEADINGS=(
  "### Files to modify"
  "### AREA 5. Files to modify"
  "## Files to modify"
  "#### Files to create"
  "### New files to create"
  "### Files to Modify"
  "## FILES TO MODIFY"
  "## Files To Modify"
  "### Files to create/modify"
  "### Files to create / modify"
  "### Files to modify (3 files)"
  "### Files to create or modify"
  "### N. Files to Modify"
  "### Files to modify:"
  "### Files to modify the registry wiring"
  $'###\tFiles to modify'
  "   ### Files to modify"
  "### Files to delete"
  "### Files to touch"
  "### Files to update"
  "### Files to replace"
  "### Files to refactor"
  "### Files to edit"
  "### Files to move"
  "### Files to write"
  # Novel/never-in-corpus modification verbs (Codex R4 allowlist-bypass exploits).
  # The DENYLIST must gate these even though no allowlist enumerates them — this is
  # the assertion that makes the test pin completeness, not just echo a verb list.
  "### Files to rename"
  "### Files to patch"
  "### Files to migrate"
  "### Files to regenerate"
  "### Files to add"
  "### Files to remove"
  "### Files to wire"
  "### Files to rework"
  "### Files to generate"
  # Codex R5 CRITICAL: a later read-only phrase must not flip a modification heading.
  "### Files to modify (files to read first)"
  # Codex R5 MAJOR: internal whitespace (tab / double space) inside "files to <verb>".
  $'### Files to\tmodify'
  "### Files  to modify"
  "### Files to  modify"
)

# READ-ONLY verbs must NOT gate: a spec that merely reads/verifies a dispatch file
# does not touch the dispatch path — gating it would be a false positive.
READONLY_HEADINGS=(
  "### Files to read"
  "### Files to review"
  "### Files to verify"
  "### Files to inspect"
  "### Files to check"
  "### Files to audit"
  "### Files to reference"
  "### Files to look"
  "### Files to keep"
  "### Files to list"
  "### Files to examine"
  "### Files to see"
)

# Prefixed heading + dispatch file + a real FINAL audit_dispatch.py phase → must PASS.
cat > "$SBX/with_audit.md" <<'SPEC'
# BUILD_SPEC — prefixed heading with proper audit phase

## Phase 1: Touch the dispatch path
engine: codex
### AREA 5. Files to modify
- `recoil/execution/providers/flora.py` — change a real code path
Validation command: `true`

## Phase 2: Dispatch audit
engine: codex
Run the mandatory end-state audit.
```bash
python3 recoil/pipeline/tools/audit_dispatch.py --project x --episode ep_001
```
Validation command: `true`
SPEC

# FALSE-POSITIVE GUARD #1 (the REC-76 shape that 5cbf2e76 fixed): the Files-to-modify
# section lists only docs; dispatch files are merely CITED in Requirements prose.
cat > "$SBX/docs_only.md" <<'SPEC'
# BUILD_SPEC — docs reconciliation

## Phase 1: Reconcile docs
engine: codex
### Files to modify
- `recoil/pipeline/CLAUDE.md` — update the topology section

### Requirements
1. Document that execution/providers/flora.py is the worksurface adapter.
2. Cite model_profiles.json and PROMPT_BIBLE.yaml for context. No code changes.
Validation command: `true`
SPEC

# ANTI-DODGE (the reason we deliberately do NOT carve out phase headings): a spec that
# hides a real dispatch-file declaration under a `## Phase N:` heading whose title
# contains "files to modify" must STILL be caught. A heading-text carve-out would let
# this dodge the fail-closed gate and ship unaudited — strictly worse than the
# theoretical over-block a carve-out would prevent.
cat > "$SBX/dodge_attempt.md" <<'SPEC'
# BUILD_SPEC — dodge attempt

## Phase 3: Files to modify
engine: codex
- `recoil/execution/providers/flora.py` — a real provider change smuggled in
Validation command: `true`
SPEC

# ── Behavioral assertions ─────────────────────────────────────────
echo "-- NEVER-WEAKEN: a real flora.py modification must BLOCK under EVERY heading style/verb --"
i=0
for h in "${WEAKEN_HEADINGS[@]}"; do
    i=$((i+1)); f="$SBX/weaken_$i.md"; mk_dispatch_spec "$h" "$f"
    assert_gate "$f" BLOCKED "'$h' + flora.py, no audit"
done

echo "-- READ-ONLY verbs must PASS (reading/verifying a dispatch file is not a modification) --"
i=0
for h in "${READONLY_HEADINGS[@]}"; do
    i=$((i+1)); f="$SBX/readonly_$i.md"; mk_dispatch_spec "$h" "$f"
    assert_gate "$f" PASSED "'$h' + flora.py (read-only reference, no audit)"
done

echo "-- NO OVER-BLOCK: a dispatch-touch spec WITH a final audit phase must PASS --"
assert_gate "$SBX/with_audit.md" PASSED "prefixed heading + flora.py + final audit_dispatch.py phase"

echo "-- FALSE-POSITIVE GUARD (do not re-break the 5cbf2e76 docs fix) --"
assert_gate "$SBX/docs_only.md" PASSED "docs-only Files-to-modify; flora.py only in Requirements prose"

echo "-- ANTI-DODGE: no heading-class carve-out lets a real declaration slip through --"
assert_gate "$SBX/dodge_attempt.md" BLOCKED "flora.py decl. under a '## Phase N: Files to modify' title must STILL block"

# ── Live-spec smoke (the two specs in flight must parse clean) ─────
echo "-- LIVE SPECS: in-flight docs/tooling specs must PASS --"
REC76="$REPO/consultations/recoil/engine-topology-reconcile-2026-06-05/BUILD_SPEC.md"
REC75="$REPO/consultations/tooling/convergence-loop-2026-06-05/BUILD_SPEC.md"
[ -f "$REC76" ] && assert_gate "$REC76" PASSED "REC-76 engine-topology docs spec" || ok "REC-76 spec absent (skip)"
[ -f "$REC75" ] && assert_gate "$REC75" PASSED "REC-75 convergence-loop spec" || ok "REC-75 spec absent (skip)"

# ── CORPUS never-weaken: EVERY real create/modify heading in the repo must OPEN ─────
# Runs each distinct heading through the ACTUAL awk extracted from dispatch_target_lines,
# so a future edit that under-matches any real heading style fails here. A heading that
# "opens" emits the flora.py sentinel placed right after it.
echo "-- CORPUS (denylist): read-only-verb headings stay un-gated; EVERY other 'files to <verb>' heading opens --"
AWKPROG="$(awk '
    /spec_without_fences \| awk '\''/ {grab=1; next}
    grab && /^[[:space:]]*'\''[[:space:]]*$/ {exit}
    grab {print}
' "$ORCH")"
RO_VERBS='files to (read|review|look|verify|reference|inspect|check|audit|keep|list|examine|see)([^a-z]|$)'
opens() { printf '%s\n- recoil/execution/providers/flora.py\n' "$1" | awk "$AWKPROG" | grep -q 'flora\.py'; }
is_readonly() { printf '%s' "$1" | grep -qiE "$RO_VERBS"; }
if [ -z "$AWKPROG" ]; then
    no "could not extract dispatch_target_lines awk program from orchestrator"
else
    missed=0; mtotal=0; leaked=0; rtotal=0
    while IFS= read -r h; do
        [ -z "$h" ] && continue
        if is_readonly "$h"; then
            rtotal=$((rtotal+1))
            if opens "$h"; then leaked=$((leaked+1)); echo "    WRONGLY GATED (read-only): $h"; fi
        else
            mtotal=$((mtotal+1))
            opens "$h" || { missed=$((missed+1)); echo "    MISSED (modification): $h"; }
        fi
    done < <(grep -rhiE '^#{1,6} .*files to [a-z]' "$REPO" --include='*.md' 2>/dev/null | sort -u)
    if [ "$missed" -eq 0 ]; then ok "all $mtotal distinct non-read-only headings open (no never-weaken miss)"
    else no "$missed/$mtotal non-read-only headings did NOT open (never-weaken hole)"; fi
    if [ "$leaked" -eq 0 ]; then ok "all $rtotal distinct read-only headings stay un-gated (no false positive)"
    else no "$leaked/$rtotal read-only headings wrongly opened (false positive)"; fi
fi

# ── Static never-weaken assertions on the awk ─────────────────────
echo "-- STATIC: opener is case-insensitive; Requirements prose is NOT scanned --"
SRC="$(awk '/^dispatch_target_lines\(\)/{f=1} f{print} /^}/{if(f)exit}' "$ORCH")"
printf '%s\n' "$SRC" | grep -q 'tolower(' \
  && ok "opener matches case-insensitively (tolower)" \
  || no "opener is case-sensitive — capitalized headings (Files to Modify) slip through"
printf '%s\n' "$SRC" | grep -qE '\(read\|review\|look\|verify' \
  && ok "opener is a denylist (gates every verb except an enumerated read-only set)" \
  || no "opener is not a denylist — an allowlist misses future modification verbs (rename/patch/migrate)"
printf '%s\n' "$SRC" | grep -q 'match(' \
  && ok "classifies the FIRST files-to verb via match() (R5 anchoring + whitespace fix)" \
  || no "opener does not anchor to the first files-to verb (R5 CRITICAL: a later read-only phrase can flip a modify heading)"
printf '%s\n' "$SRC" | grep -q 'in_requirements' \
  && no "Requirements-prose scan reintroduced (re-breaks the 5cbf2e76 docs fix)" \
  || ok "Requirements prose is NOT scanned (false-positive fix preserved)"
printf '%s\n' "$SRC" | grep -E 'tolower\(\$0\) ~' | grep -qi 'phase' \
  && no "opener carves out a heading class (e.g. phase) — a DODGE surface: a real decl could hide under it" \
  || ok "opener has no heading-class carve-out (no never-weaken dodge surface)"

echo "--------"; echo "PASS=$PASS  FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
