#!/bin/bash
# codex_audit.sh — slow, chunked WHOLE-CODEBASE Codex audit (Stage 1 of 2).
#
# Unlike codex_overnight_review.sh (which runs `codex review` over a DIFF), this
# runs `codex exec` as a read-only agent over ONE subsystem per invocation,
# graded against the architectural laws + the SSOT manifest, and emits STRICT
# JSON findings for downstream verification + auto-filing.
#
# Two-stage design (the verify-gate is what keeps this from becoming slop):
#   Stage 1 (this script, unattended/schedulable): codex exec audits one
#     subsystem -> structured findings JSON. READ-ONLY sandbox; it cannot mutate
#     the tree. Report-only — it NEVER files or fixes.
#   Stage 2 (a Claude session, where Linear MCP auth lives): adversarially
#     verify each finding against the live code, dedup vs the open board, and
#     auto-file the survivors to Linear (label: codex-audit).
#
# Why split: the expensive unattended grind is Codex; the judgment + Linear
# filing needs Claude + MCP auth, which headless cron can't be trusted with.
#
# Slow by design: one subsystem per run, rotated via a marker, so a full sweep
# takes ~N nights and stays cost-bounded + low-noise (focused context).
#
# Dispatch overnight via tmux (the /dispatch pattern), e.g.:
#   tmux new -d -s codex-audit "bash ~/CLAUDE_PROJECTS/recoil/pipeline/tools/codex_audit.sh"
# (Runs where Codex.app + auth exist — MacBook for now; Studio once flipped.)
#
# Learnings inherited from codex_overnight_review.sh (REC-13/REC-40 validation):
#   - codex exec MUST have stdin closed (</dev/null) or it hangs reading stdin.
#   - ripgrep must be on PATH (Codex search); brew installs /opt/homebrew/bin/rg.
#   - Work in the ISOLATED clone (~/Code/recoil-codex), never the live checkout.
#
# Usage:
#   codex_audit.sh                 # audit the next subsystem in rotation
#   codex_audit.sh <subsystem>     # audit a specific subsystem (path under repo root)
# Env:
#   CODEX_AUDIT_MODEL=<model>      # override the codex model (default: codex config)
set -uo pipefail

CODEX="/Applications/Codex.app/Contents/Resources/codex"
CLONE="$HOME/Code/recoil-codex"
REPORTDIR="$HOME/CLAUDE_PROJECTS/overnight-reviews/audit"   # gitignored
MARKER="$REPORTDIR/.next_subsystem"                          # rotation index
export PATH="/opt/homebrew/bin:$PATH"                        # ensure ripgrep for Codex

# Rotation list — one audit unit per run. Bounded subsystems, SSOT-critical first.
SUBSYSTEMS=(
  "recoil/core"
  "recoil/execution"
  "recoil/execution/providers"
  "recoil/pipeline/orchestrator"
  "recoil/pipeline/_lib"
  "recoil/pipeline/core"
  "recoil/pipeline/tools"
  "recoil/visual"
  "recoil/workspace"
)

mkdir -p "$REPORTDIR"
if [ ! -x "$CODEX" ]; then echo "FATAL: codex binary not found at $CODEX" >&2; exit 1; fi
if [ ! -d "$CLONE/.git" ]; then echo "FATAL: codex clone missing at $CLONE" >&2; exit 1; fi

# Pick the subsystem: explicit arg overrides rotation.
if [ "${1:-}" != "" ]; then
  TARGET="$1"
  ADVANCE=0
else
  IDX=0
  [ -f "$MARKER" ] && IDX="$(cat "$MARKER" 2>/dev/null || echo 0)"
  case "$IDX" in (*[!0-9]*) IDX=0;; esac
  [ "$IDX" -ge "${#SUBSYSTEMS[@]}" ] && IDX=0
  TARGET="${SUBSYSTEMS[$IDX]}"
  ADVANCE=1
fi

cd "$CLONE" || exit 1
git fetch origin --quiet || { echo "FATAL: git fetch failed" >&2; exit 1; }
git checkout main --quiet 2>/dev/null || git checkout -B main origin/main --quiet
git reset --hard origin/main --quiet

if [ ! -d "$CLONE/$TARGET" ]; then
  echo "FATAL: subsystem '$TARGET' not found in clone" >&2; exit 1
fi

STAMP="$(date +%Y%m%d-%H%M%S)"
SLUG="$(echo "$TARGET" | tr '/' '-')"
FINDINGS="$REPORTDIR/findings-$SLUG-$STAMP.json"
SCHEMA="$REPORTDIR/.schema.json"
LOG="$REPORTDIR/audit-$SLUG-$STAMP.log"

# Forced-structured output schema — Stage 2 parses this, no scraping.
cat > "$SCHEMA" <<'JSON'
{
  "type": "object",
  "additionalProperties": false,
  "required": ["subsystem", "head_sha", "findings"],
  "properties": {
    "subsystem": {"type": "string"},
    "head_sha": {"type": "string"},
    "findings": {
      "type": "array",
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["severity", "category", "law_or_rule", "file", "line", "title", "evidence", "recommendation", "effort", "confidence"],
        "properties": {
          "severity":       {"type": "string", "enum": ["HIGH", "MEDIUM", "LOW"]},
          "category":       {"type": "string", "enum": ["SSOT", "architectural-law", "bug", "efficiency", "dead-code", "drift"]},
          "law_or_rule":    {"type": ["string", "null"]},
          "file":           {"type": "string"},
          "line":           {"type": ["integer", "null"]},
          "title":          {"type": "string"},
          "evidence":       {"type": "string"},
          "recommendation": {"type": "string"},
          "effort":         {"type": ["string", "null"], "enum": ["S", "M", "L", null]},
          "confidence":     {"type": "string", "enum": ["high", "medium", "low"]}
        }
      }
    }
  }
}
JSON

HEAD_SHA="$(git rev-parse --short HEAD)"

read -r -d '' PROMPT <<PROMPT_EOF
You are a precise architectural auditor for the Recoil engine. Audit ONLY the
files under the subsystem directory: ${TARGET}/

RUBRIC — read these two files first; they are the grading key:
  1. consultations/architectural-laws-general/SYNTHESIS.md  (the architectural laws)
  2. recoil/architecture/ssot_manifest.yaml                 (the canonical SSOT / capability map)

Find concrete violations and improvement opportunities IN ${TARGET}/ ONLY:
  - SSOT violations: state with more than one canonical home; forked/duplicated
    resolvers, config, or constants; manifest entries that contradict the code.
  - Architectural-law violations: cite which law from SYNTHESIS.md.
  - Genuine bugs: logic that is wrong on the live path (not hypothetical).
  - Dead code: unreachable branches, unused exports, write-only state.
  - Efficiency: redundant work, repeated I/O, obvious complexity wins.
  - Drift: code that contradicts its own docstring/comment or the manifest.

EVIDENCE DISCIPLINE (this is what makes the audit usable, not noise):
  - Every finding MUST cite a real file path (relative to repo root) and, where
    possible, a line number, plus a concrete quoted observation from the code.
  - Audit against THIS codebase + the rubric — NOT generic best practices.
    Do NOT emit "consider adding tests / type hints / docstrings" style noise.
  - Prefer a few HIGH-confidence findings over many shallow ones. If you are
    not sure a thing is real, set confidence "low" or omit it.
  - Do not modify any files. This is read-only analysis.

Return ONLY the JSON object conforming to the provided output schema, with
subsystem="${TARGET}" and head_sha="${HEAD_SHA}". If you find nothing
substantive, return an empty findings array — that is a valid, good result.
PROMPT_EOF

echo "[audit] subsystem=$TARGET head=$HEAD_SHA model=${CODEX_AUDIT_MODEL:-<codex-default>}"
echo "[audit] running codex exec (read-only)... this is slow + costs tokens."

MODEL_ARGS=()
[ -n "${CODEX_AUDIT_MODEL:-}" ] && MODEL_ARGS=(-m "$CODEX_AUDIT_MODEL")

"$CODEX" exec \
  -s read-only \
  "${MODEL_ARGS[@]+"${MODEL_ARGS[@]}"}" \
  -C "$CLONE" \
  --skip-git-repo-check \
  --output-schema "$SCHEMA" \
  -o "$FINDINGS" \
  "$PROMPT" </dev/null >"$LOG" 2>&1
RC=$?

# Advance rotation only on a clean run.
if [ "$RC" -eq 0 ] && [ "$ADVANCE" -eq 1 ]; then
  NEXT=$(( (IDX + 1) % ${#SUBSYSTEMS[@]} ))
  echo "$NEXT" > "$MARKER"
fi

# Quick human-readable summary count (best-effort; jq optional).
COUNT="?"
if command -v jq >/dev/null 2>&1 && [ -f "$FINDINGS" ]; then
  COUNT="$(jq '.findings | length' "$FINDINGS" 2>/dev/null || echo '?')"
fi

echo "exit=$RC  subsystem=$TARGET  findings=$COUNT"
echo "  json:  $FINDINGS"
echo "  log:   $LOG"
echo ""
echo "NEXT (Stage 2, in a Claude session with Linear MCP):"
echo "  Read $FINDINGS, adversarially verify each finding against the live code,"
echo "  dedup vs the open Recoil board, and auto-file survivors to Linear"
echo "  (team Recoil, label codex-audit, link back to this report)."
exit $RC
