#!/bin/bash # codex_audit.sh — slow, chunked WHOLE-CODEBASE Codex audit (Stage 1 of 2). # # Unlike codex_overnight_review.sh (which runs `codex review` over a DIFF), this # runs `codex exec` as a read-only agent over ONE subsystem per invocation, # graded against the architectural laws + the SSOT manifest, and emits STRICT # JSON findings for downstream verification + auto-filing. # # Two-stage design (the verify-gate is what keeps this from becoming slop): # Stage 1 (this script, unattended/schedulable): codex exec audits one # subsystem -> structured findings JSON. READ-ONLY sandbox; it cannot mutate # the tree. Report-only — it NEVER files or fixes. # Stage 2 (a Claude session, where Linear MCP auth lives): adversarially # verify each finding against the live code, dedup vs the open board, and # auto-file the survivors to Linear (label: codex-audit). # # Why split: the expensive unattended grind is Codex; the judgment + Linear # filing needs Claude + MCP auth, which headless cron can't be trusted with. # # Slow by design: one subsystem per run, rotated via a marker, so a full sweep # takes ~N nights and stays cost-bounded + low-noise (focused context). # # Dispatch overnight via tmux (the /dispatch pattern), e.g.: # tmux new -d -s codex-audit "bash ~/CLAUDE_PROJECTS/recoil/pipeline/tools/codex_audit.sh" # (Runs where Codex.app + auth exist — MacBook for now; Studio once flipped.) # # Learnings inherited from codex_overnight_review.sh (REC-13/REC-40 validation): # - codex exec MUST have stdin closed ( # audit a specific subsystem (path under repo root) # Env: # CODEX_AUDIT_MODEL= # override the codex model (default: codex config) set -uo pipefail CODEX="/Applications/Codex.app/Contents/Resources/codex" CLONE="$HOME/Code/recoil-codex" REPORTDIR="$HOME/CLAUDE_PROJECTS/overnight-reviews/audit" # gitignored MARKER="$REPORTDIR/.next_subsystem" # rotation index export PATH="/opt/homebrew/bin:$PATH" # ensure ripgrep for Codex # Rotation list — one audit unit per run. Bounded subsystems, SSOT-critical first. SUBSYSTEMS=( "recoil/core" "recoil/execution" "recoil/execution/providers" "recoil/pipeline/orchestrator" "recoil/pipeline/_lib" "recoil/pipeline/core" "recoil/pipeline/tools" "recoil/visual" "recoil/workspace" ) mkdir -p "$REPORTDIR" if [ ! -x "$CODEX" ]; then echo "FATAL: codex binary not found at $CODEX" >&2; exit 1; fi if [ ! -d "$CLONE/.git" ]; then echo "FATAL: codex clone missing at $CLONE" >&2; exit 1; fi # Pick the subsystem: explicit arg overrides rotation. if [ "${1:-}" != "" ]; then TARGET="$1" ADVANCE=0 else IDX=0 [ -f "$MARKER" ] && IDX="$(cat "$MARKER" 2>/dev/null || echo 0)" case "$IDX" in (*[!0-9]*) IDX=0;; esac [ "$IDX" -ge "${#SUBSYSTEMS[@]}" ] && IDX=0 TARGET="${SUBSYSTEMS[$IDX]}" ADVANCE=1 fi cd "$CLONE" || exit 1 git fetch origin --quiet || { echo "FATAL: git fetch failed" >&2; exit 1; } git checkout main --quiet 2>/dev/null || git checkout -B main origin/main --quiet git reset --hard origin/main --quiet if [ ! -d "$CLONE/$TARGET" ]; then echo "FATAL: subsystem '$TARGET' not found in clone" >&2; exit 1 fi STAMP="$(date +%Y%m%d-%H%M%S)" SLUG="$(echo "$TARGET" | tr '/' '-')" FINDINGS="$REPORTDIR/findings-$SLUG-$STAMP.json" SCHEMA="$REPORTDIR/.schema.json" LOG="$REPORTDIR/audit-$SLUG-$STAMP.log" # Forced-structured output schema — Stage 2 parses this, no scraping. cat > "$SCHEMA" <<'JSON' { "type": "object", "additionalProperties": false, "required": ["subsystem", "head_sha", "findings"], "properties": { "subsystem": {"type": "string"}, "head_sha": {"type": "string"}, "findings": { "type": "array", "items": { "type": "object", "additionalProperties": false, "required": ["severity", "category", "law_or_rule", "file", "line", "title", "evidence", "recommendation", "effort", "confidence"], "properties": { "severity": {"type": "string", "enum": ["HIGH", "MEDIUM", "LOW"]}, "category": {"type": "string", "enum": ["SSOT", "architectural-law", "bug", "efficiency", "dead-code", "drift"]}, "law_or_rule": {"type": ["string", "null"]}, "file": {"type": "string"}, "line": {"type": ["integer", "null"]}, "title": {"type": "string"}, "evidence": {"type": "string"}, "recommendation": {"type": "string"}, "effort": {"type": ["string", "null"], "enum": ["S", "M", "L", null]}, "confidence": {"type": "string", "enum": ["high", "medium", "low"]} } } } } } JSON HEAD_SHA="$(git rev-parse --short HEAD)" read -r -d '' PROMPT <}" echo "[audit] running codex exec (read-only)... this is slow + costs tokens." MODEL_ARGS=() [ -n "${CODEX_AUDIT_MODEL:-}" ] && MODEL_ARGS=(-m "$CODEX_AUDIT_MODEL") "$CODEX" exec \ -s read-only \ "${MODEL_ARGS[@]+"${MODEL_ARGS[@]}"}" \ -C "$CLONE" \ --skip-git-repo-check \ --output-schema "$SCHEMA" \ -o "$FINDINGS" \ "$PROMPT" "$LOG" 2>&1 RC=$? # Advance rotation only on a clean run. if [ "$RC" -eq 0 ] && [ "$ADVANCE" -eq 1 ]; then NEXT=$(( (IDX + 1) % ${#SUBSYSTEMS[@]} )) echo "$NEXT" > "$MARKER" fi # Quick human-readable summary count (best-effort; jq optional). COUNT="?" if command -v jq >/dev/null 2>&1 && [ -f "$FINDINGS" ]; then COUNT="$(jq '.findings | length' "$FINDINGS" 2>/dev/null || echo '?')" fi echo "exit=$RC subsystem=$TARGET findings=$COUNT" echo " json: $FINDINGS" echo " log: $LOG" echo "" echo "NEXT (Stage 2, in a Claude session with Linear MCP):" echo " Read $FINDINGS, adversarially verify each finding against the live code," echo " dedup vs the open Recoil board, and auto-file survivors to Linear" echo " (team Recoil, label codex-audit, link back to this report)." exit $RC