"""run_shot -- Shot-level meta-op for Phase 3 production cutover.

Wraps StepRunner.execute_keyframe / execute_video in an outer attempt loop
with budget enforcement, FailureMode action routing, and review queue escalation.

INVARIANT: run_shot() NEVER raises on expected failure (D6).
All 7 terminal statuses are returned as OpResult data.

Action routing (spec Section 1, FailureMode Action Map):
  ACCEPT          -> ok (notes in validation_notes)
  AUTO_REROLL     -> feedback agent inner attempts -> REVIEW_QUEUE on exhaust
  SOFTEN_RETRY    -> prompt.soften + retry once -> REVIEW_QUEUE on double failure
  REVIEW_QUEUE    -> immediate review queue entry
  BUDGET_STOP     -> budget_exhausted status
"""

from __future__ import annotations

import logging
import traceback
from pathlib import Path
from typing import Any

from recoil.pipeline._lib.coverage_context import (
    CoveragePassContext,
    OpResult,
    StopOnReview,
)
from recoil.core.critic import FailureMode
from recoil.core.paths import ProjectPaths
from recoil.pipeline._lib.budget_manager import BudgetGuard
from recoil.pipeline._lib import ops_log
from recoil.pipeline._lib import review_queue as rq
from recoil.pipeline._lib.prompt_soften import needs_softening, soften_prompt, is_model_soften_eligible

from recoil.pipeline.core.dispatch import dispatch
from recoil.pipeline.core.dispatch_context import DispatchContext
from recoil.pipeline.core.cost import read_cost_from_result
from recoil.core.exceptions import ModelProfileLookupError
from recoil.pipeline._lib.sanctioned_fallbacks import (
    FallbackRecord,
    fire_sanctioned_fallback,
    register_sanctioned_fallback,
)


# Tenet 6: register the budget-pre-flight fallback so the cost-estimate
# default for unregistered models is named + observable per the registry.
register_sanctioned_fallback(
    FallbackRecord(
        name="budget_estimate_unknown_model_default",
        justification=(
            "model_profiles.json lookup for cost-per-attempt fails (unknown model "
            "id, schema mismatch). The pre-flight budget check still needs a "
            "number; ESTIMATED_COST_PER_ATTEMPT is the documented default."
        ),
        quality_neutrality_argument=(
            "The substitution flows ONLY into the per-shot pre-flight budget cap "
            "decision. Actual generation cost is read from the provider's "
            "RunResult metadata (via read_cost_from_result), so the fallback "
            "cannot affect generation bytes; at worst it lets one extra shot "
            "through a budget cap or trips the cap one shot early."
        ),
        expected_substitution=(
            "ESTIMATED_COST_PER_ATTEMPT ($0.15) in place of the model_profiles "
            "lookup result"
        ),
        introduced_in="Phase E.7",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="model_profile_feature_flag_default",
        justification=(
            "model_profiles.json lookup for the per-model enable_sibling_refs "
            "flag fails (unknown model id, missing key). The flag default True "
            "is the documented behavior; equivalent to 'no profile override'."
        ),
        quality_neutrality_argument=(
            "The flag controls inclusion of sibling refs in prompt assembly. "
            "Defaulting True matches the un-overridden behavior; the substitution "
            "produces the same prompt the model would have received with no "
            "model_profiles.json entry. No generation byte change."
        ),
        expected_substitution=(
            "True (in place of profile.get('enable_sibling_refs', True))"
        ),
        introduced_in="Phase E.8",
    )
)

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Action Map -- static dict mapping FailureMode -> action config
# ---------------------------------------------------------------------------

ACTION_MAP: dict[FailureMode, dict[str, Any]] = {
    FailureMode.NONE: {"action": "ACCEPT"},
    FailureMode.ANATOMY_FACE_MERGE: {
        "action": "AUTO_REROLL", "inner_attempts": 2, "strategy": "anatomy_anchor",
    },
    FailureMode.ANATOMY_LIMB_MISCOUNT: {
        "action": "AUTO_REROLL", "inner_attempts": 2, "strategy": "anatomy_anchor",
    },
    FailureMode.IDENTITY_DRIFT: {
        "action_soft": "ACCEPT",
        "action_hard": "AUTO_REROLL",
        "inner_attempts": 1,
        "strategy": "ref_prune_and_anchor",
    },
    FailureMode.BACKGROUND_CONTAMINATION: {"action": "ACCEPT"},
    FailureMode.WARDROBE_MISMATCH: {"action": "ACCEPT"},
    FailureMode.LIGHTING_MISMATCH: {"action": "ACCEPT"},
    FailureMode.GRID_INFLUENCE: {
        "action": "AUTO_REROLL", "inner_attempts": 1, "strategy": "reduce_refs",
    },
    FailureMode.SAFETY_SOFTENED: {"action": "SOFTEN_RETRY", "inner_attempts": 1},
    FailureMode.UNKNOWN: {"action": "REVIEW_QUEUE"},
    FailureMode.MOTION_FAILURE: {
        "action": "AUTO_REROLL", "inner_attempts": 1, "strategy": "seed",
    },
    FailureMode.END_FRAME_DRIFT: {"action": "ACCEPT"},
    FailureMode.CONTENT_FILTER_HARD_BLOCK: {
        "action": "SOFTEN_RETRY", "inner_attempts": 1,
    },
    FailureMode.REF_BLEED: {
        "action": "AUTO_REROLL", "inner_attempts": 1, "strategy": "reduce_refs",
    },
    FailureMode.AUDIO_SYNC_DRIFT: {"action": "ACCEPT"},
    FailureMode.COVERAGE_GEOMETRY_BROKEN: {"action": "ACCEPT"},
}

OUTER_ATTEMPT_CAP = 4
ESTIMATED_COST_PER_ATTEMPT = 0.15  # conservative default; overridden by model cost


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _step_attr(step_result, name: str, default=None):
    """Read a legacy StepResult attribute from either a StepResult OR a
    post-CP-5 RunResult. RunResult exposes cost_usd / final_state /
    gate_verdict / take_index / model / pipeline via .metadata.
    """
    md = getattr(step_result, "metadata", None)
    if isinstance(md, dict) and name in md:
        return md.get(name, default)
    return getattr(step_result, name, default)


def extract_failure_mode(step_result) -> FailureMode:
    """Map StepResult/RunResult to a FailureMode enum value.

    Priority:
    1. CriticResult.dominant_failure_mode (typed, from Dimension.failure_mode)
    2. gate_verdict.details.failure_category string (from explicit gates)
    3. gate_verdict.gate_name keyword matching (legacy fallback)
    4. step_result.error string matching (for inline video critics)
    5. FailureMode.NONE (no failure detected)
    """
    # ── Priority 1: typed dominant_failure_mode from CriticResult ─────
    gv = _step_attr(step_result, "gate_verdict")
    if gv is not None and not gv.passed:
        details = getattr(gv, "details", None) or {}

        # Read critic_result from details dict (set by StepRunner gates)
        critic_result = details.get("critic_result") if isinstance(details, dict) else None
        if critic_result is not None:
            dfm = getattr(critic_result, "dominant_failure_mode", None)
            if dfm is not None and isinstance(dfm, FailureMode):
                return dfm

        # ── Priority 2: gate_verdict.details.failure_category string ──
        cat = details.get("failure_category", "") if isinstance(details, dict) else ""
        # Try direct enum lookup
        try:
            return FailureMode(cat)
        except ValueError:
            pass

        # ── Priority 3: gate_name keyword matching (legacy) ───────────
        gn = (gv.gate_name or "").lower()
        if "identity" in gn or "identity" in cat:
            return FailureMode.IDENTITY_DRIFT
        if "anatomy" in gn or "face_merge" in cat:
            return FailureMode.ANATOMY_FACE_MERGE
        if "content_filter" in cat or "safety" in gn:
            return FailureMode.SAFETY_SOFTENED
        return FailureMode.UNKNOWN

    # ── Priority 4: step_result.error string matching ─────────────────
    error = (getattr(step_result, "error", "") or "").lower()
    final_state = (_step_attr(step_result, "final_state", "") or "").lower()

    if not error and final_state not in ("pending_qc", "video_semantic_failed"):
        return FailureMode.NONE

    # Map error keywords to FailureMode
    if "content_filter" in error or "content_policy" in error or "safety" in error:
        return FailureMode.CONTENT_FILTER_HARD_BLOCK
    if "frame_extraction" in error or "no frames" in error or "corrupted" in error:
        return FailureMode.MOTION_FAILURE
    if "identity" in error or "character" in error or "face" in error:
        return FailureMode.IDENTITY_DRIFT
    if "start_frame_critic" in error and "hard fail" in error:
        return FailureMode.IDENTITY_DRIFT
    if "video_frame_critic" in error:
        return FailureMode.MOTION_FAILURE
    if "video_semantic_failed" in final_state:
        return FailureMode.UNKNOWN
    if "pending_qc" in final_state:
        return FailureMode.UNKNOWN

    # ── Priority 5: no failure detected ───────────────────────────────
    return FailureMode.NONE


# Phase D — MF-4: promoted from private to public.
# scripts/capture_phase_c_fixtures.py reached across the package boundary
# via the underscore name. Promotion makes the contract explicit. Underscore
# alias retained for one-cycle backwards compat — in-module callers and
# pipeline-internal tests (test_run_shot_gates, test_additional_unit,
# test_phase_3_5_acceptance) continue to work; new callers should import
# `extract_failure_mode`.
_extract_failure_mode = extract_failure_mode


def _is_identity_drift_hard(step_result) -> bool:
    """Check if identity drift has hard failures (structural severity)."""
    gv = _step_attr(step_result, "gate_verdict")
    if gv is None:
        return False
    details = getattr(gv, "details", {}) or {}
    hard_failures = details.get("hard_failures", [])
    if hard_failures:
        return True
    return False


def _resolve_action(failure_mode: FailureMode, step_result) -> str:
    """Resolve the action for a given failure mode, handling IDENTITY_DRIFT dual-path."""
    entry = ACTION_MAP.get(failure_mode)
    if entry is None:
        return "REVIEW_QUEUE"

    # IDENTITY_DRIFT has dual-path: soft -> ACCEPT, hard -> AUTO_REROLL
    if failure_mode == FailureMode.IDENTITY_DRIFT:
        if _is_identity_drift_hard(step_result):
            return entry.get("action_hard", "AUTO_REROLL")
        else:
            return entry.get("action_soft", "ACCEPT")

    return entry.get("action", "REVIEW_QUEUE")


def _get_estimated_cost(model: str) -> float:
    """Get estimated cost for budget pre-check.

    Tenet 6: model-profile lookup failure feeds the budget cap; silently
    defaulting to ``ESTIMATED_COST_PER_ATTEMPT`` masks both unexpected
    cost-cap trips AND silent overruns. A real lookup failure raises
    ``ModelProfileLookupError`` so the caller can decide whether to fall
    back to the estimate explicitly with annotated provenance.
    """
    try:
        from recoil.core import model_profiles
        profile = model_profiles.get_profile(model)
        return profile.get("cost_per_image", profile.get("cost_per_second", 0.0)) or ESTIMATED_COST_PER_ATTEMPT
    except (KeyError, AttributeError) as e:
        logger.exception(
            "_get_estimated_cost: model profile lookup failed for model=%s", model,
        )
        raise ModelProfileLookupError(model, message=str(e)) from e


def _ops_log_path(paths) -> Path:
    """Resolve ops.log.jsonl path from ProjectPaths."""
    return ProjectPaths.from_root(paths.project_root).visual_state_dir / "ops.log.jsonl"


def _review_queue_path(paths) -> Path:
    """Resolve review_queue.jsonl path from ProjectPaths."""
    return ProjectPaths.from_root(paths.project_root).visual_state_dir / "review_queue.jsonl"


def _build_keyframe_gates(shot: dict, step_runner) -> list:
    """Build gate functions for keyframe QC from shot data."""
    from recoil.execution.step_runner import make_identity_gate
    gates = []
    identity_refs = shot.get("identity_refs")
    if identity_refs:
        ref_paths = [Path(r) if not isinstance(r, Path) else r for r in identity_refs]
        valid_refs = [r for r in ref_paths if r.exists()]
        if valid_refs:
            gates.append(make_identity_gate(
                ref_paths=valid_refs,
                prompt_skeleton=shot.get("prompt_skeleton"),
                wardrobe_phase_id=shot.get("wardrobe_phase_id"),
                wardrobe_description=shot.get("wardrobe_description"),
                current_shot_type=shot.get("shot_type") or (shot.get("prompt_data") or {}).get("shot_type"),
            ))
    return gates


# ---------------------------------------------------------------------------
# run_shot
# ---------------------------------------------------------------------------

def run_shot(
    shot: dict,
    store,
    paths,
    budget_guard: BudgetGuard,
    model: str,
    step_runner=None,
    run_id: str = "",
    style_anchor_path: Path | None = None,
    coverage_context: CoveragePassContext | None = None,
    stop_on_review: StopOnReview = StopOnReview.NEVER,
) -> OpResult:
    """Execute a single shot through the full retry/budget/routing pipeline.

    NEVER raises on expected failure (D6). All 7 terminal statuses are
    returned as OpResult data. Unhandled exceptions return status='crashed'.

    Args:
        shot: Shot dict from the plan (must have 'shot_id', 'prompt', optionally
              'pipeline' = 'keyframe'|'video', 'start_frame', 'end_frame', etc.)
        store: ExecutionStore instance
        paths: ProjectPaths instance
        budget_guard: Shared BudgetGuard for episode-level + per-shot budget
        model: Model ID (e.g. 'gemini-3-pro-image-preview', 'kling-v3')
        step_runner: StepRunner instance (lazy-created if None)
        run_id: Parent run ID for review queue entries
        style_anchor_path: Path to style anchor keyframe (or None)
        coverage_context: Coverage pass context for sibling awareness
        stop_on_review: StopOnReview enum for abort behavior

    Returns:
        OpResult with one of 7 terminal statuses.
    """
    shot_id = shot.get("shot_id", "UNKNOWN")
    prompt = shot.get("prompt", "")
    pipeline = shot.get("pipeline", "keyframe")
    total_cost = 0.0
    attempts = 0
    validation_notes: list[str] = []
    last_output_path: str | None = None
    last_failure_mode: FailureMode | None = None
    attempt_records: list[dict] = []
    softened_once = False
    op_id = "op_unknown00000"  # fallback if make_op_id fails
    log_path = _ops_log_path(paths)

    try:
        op_id = ops_log.make_op_id()

        # ── Sibling ref discovery (same scene, backward only) ────────
        sibling_ref_path: Path | None = None
        episode_id = shot.get("episode_id", "")
        scene_index = shot.get("scene_index", shot.get("scene_id"))

        # Feature flag: check model profile
        sibling_refs_enabled = True
        if model:
            try:
                from recoil.core import model_profiles as mp
                profile = mp.get_profile(model)
                sibling_refs_enabled = profile.get("enable_sibling_refs", True)
            except KeyError as e:
                # Narrow catch: mp.get_profile only raises KeyError when the
                # model id isn't registered. Other exceptions (corrupt config,
                # JSON parse errors) MUST propagate per Tenet 6 — falling back
                # to True for those cases would silently override an explicit
                # False on a real model whose profile load failed.
                fire_sanctioned_fallback(
                    "model_profile_feature_flag_default",
                    model=model,
                    error=str(e),
                )
                sibling_refs_enabled = True

        if sibling_refs_enabled and episode_id and store is not None:
            try:
                neighbors = store.get_approved_neighbors(shot_id, episode_id)
                prev = neighbors.get("previous")
                if prev and prev.get("output_path"):
                    prev_shot_data = store.get_shot(prev["shot_id"])
                    prev_scene = None
                    if prev_shot_data:
                        prev_scene = prev_shot_data.get("scene_index")

                    if scene_index is not None and prev_scene is not None and scene_index == prev_scene:
                        candidate = Path(prev["output_path"])
                        if candidate.exists():
                            sibling_ref_path = candidate
                            logger.info(
                                "Sibling ref: using %s from %s (same scene %s)",
                                candidate.name, prev["shot_id"], scene_index,
                            )
                    elif scene_index is None:
                        pass
            except Exception as e:
                logger.debug("Sibling ref lookup failed (non-fatal): %s", e)

        # Log op started
        ops_log.log_op_started(
            log_path,
            op_id=op_id,
            name="run_shot",
            args={"shot_id": shot_id, "model": model, "pipeline": pipeline},
            context={"operator": "run_episode"},
            parent_op_id=run_id or None,
        )

        try:
            estimated_cost = _get_estimated_cost(model)
        except ModelProfileLookupError as e:
            fire_sanctioned_fallback(
                "budget_estimate_unknown_model_default",
                model=model,
                error=str(e),
            )
            estimated_cost = ESTIMATED_COST_PER_ATTEMPT

        # ── Outer attempt loop (cap: 4) ──────────────────────────────────
        for outer_attempt in range(1, OUTER_ATTEMPT_CAP + 1):
            attempts = outer_attempt

            # Budget pre-check (episode-level)
            if budget_guard.would_exceed(estimated_cost):
                # Check if the last attempt produced usable output
                if last_output_path:
                    status = "budget_exhausted_success"
                else:
                    status = "budget_exhausted"
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": status, "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status=status, shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode=last_failure_mode.value if last_failure_mode else None,
                    validation_notes=validation_notes,
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # Per-shot budget cap
            if budget_guard.would_exceed_per_shot(total_cost + estimated_cost):
                budget_guard.release(estimated_cost)  # release episode reservation
                if last_output_path:
                    status = "budget_exhausted_success"
                else:
                    status = "budget_exhausted"
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": status, "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status=status, shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode=last_failure_mode.value if last_failure_mode else None,
                    validation_notes=validation_notes,
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # ── Call StepRunner via dispatch ────────────────────────────
            # Build context once per attempt (cheap; frozen dataclass).
            ctx = DispatchContext(
                caller_id="run_shot",
                step_runner=step_runner,
                project=getattr(paths, "project", None),
                episode=None,
            )
            try:
                if pipeline == "video":
                    receipt = dispatch(
                        "video_i2v",
                        {
                            "shot_id": shot_id,
                            "prompt": prompt,
                            "model": model,
                            "start_frame": shot.get("start_frame"),
                            "end_frame": shot.get("end_frame"),
                            "duration": shot.get("duration", 5),
                            "aspect_ratio": shot.get("aspect_ratio", "9:16"),
                            "inputs_snapshot": shot.get("inputs_snapshot"),
                        },
                        context=ctx,
                    )
                    step_result = receipt.run_result
                else:
                    keyframe_gates = _build_keyframe_gates(shot, step_runner)

                    # Inject style anchor as additional identity ref
                    effective_identity_refs = list(shot.get("identity_refs") or [])
                    if style_anchor_path and Path(style_anchor_path).exists():
                        effective_identity_refs.append(Path(style_anchor_path))

                    # Inject sibling ref as continuity anchor (low weight)
                    if sibling_ref_path and sibling_ref_path.exists():
                        effective_identity_refs.append(sibling_ref_path)
                        logger.info("Injected sibling ref as continuity anchor: %s", sibling_ref_path.name)

                    receipt = dispatch(
                        "image_t2i",
                        {
                            "shot_id": shot_id,
                            "prompt": prompt,
                            "model": model,
                            "scene_ref_path": shot.get("scene_ref_path"),
                            "identity_refs": effective_identity_refs,
                            "expression_refs": shot.get("expression_refs"),
                            "aspect_ratio": shot.get("aspect_ratio", "9:16"),
                            "inputs_snapshot": shot.get("inputs_snapshot"),
                            "gates": keyframe_gates,
                        },
                        context=ctx,
                    )
                    step_result = receipt.run_result
            except Exception as gen_err:
                # Generation infrastructure failure -- release reservation
                budget_guard.release(estimated_cost)
                logger.error("StepRunner error for %s attempt %d: %s",
                             shot_id, outer_attempt, gen_err)
                attempt_records.append({
                    "attempt": outer_attempt, "error": str(gen_err),
                })
                continue

            # Post-charge budget — RunResult exposes legacy StepResult
            # fields via .metadata (cost_usd, final_state, gate_verdict, …).
            actual_cost = read_cost_from_result(step_result)
            current_final_state = step_result.metadata.get("final_state") or ""
            total_cost += actual_cost
            budget_guard.charge(actual_cost, reserved_amount=estimated_cost)

            attempt_records.append({
                "attempt": outer_attempt,
                "output": step_result.output_path,
                "cost": actual_cost,
                "final_state": current_final_state,
            })

            # ── Map StepRunner final_state to terminal status ──────────
            # pending_qc -> needs_review (preserves usable output, NOT crashed)
            if current_final_state == "pending_qc":
                last_output_path = step_result.output_path
                rq_entry = rq.enqueue(
                    queue_path=_review_queue_path(paths),
                    project=paths.project,
                    episode_id=shot.get("episode_id", ""),
                    shot_id=shot_id,
                    run_id=run_id,
                    reason="pending_qc",
                    failure_mode="pending_qc",
                    attempts=attempt_records,
                    total_cost_usd=total_cost,
                    original_prompt=prompt,
                )
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": "needs_review", "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status="needs_review", shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode="pending_qc",
                    validation_notes=validation_notes,
                    review_queue_id=rq_entry.get("rq_id"),
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # icu_escalated -> icu_escalated (pass through)
            if current_final_state == "icu_escalated":
                last_output_path = step_result.output_path
                rq_entry = rq.enqueue(
                    queue_path=_review_queue_path(paths),
                    project=paths.project,
                    episode_id=shot.get("episode_id", ""),
                    shot_id=shot_id,
                    run_id=run_id,
                    reason="icu_escalated",
                    failure_mode="icu_escalated",
                    attempts=attempt_records,
                    total_cost_usd=total_cost,
                    original_prompt=prompt,
                )
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": "icu_escalated", "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status="icu_escalated", shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode="icu_escalated",
                    validation_notes=validation_notes,
                    review_queue_id=rq_entry.get("rq_id"),
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # Success path
            if step_result.success:
                last_output_path = step_result.output_path

                # Extract failure mode for validation notes (even on success,
                # there may be SOFT findings from deferred gates)
                failure_mode = _extract_failure_mode(step_result)
                last_failure_mode = failure_mode

                if failure_mode == FailureMode.NONE:
                    # Clean pass
                    ops_log.log_op_completed(
                        log_path, op_id=op_id,
                        outputs={"status": "ok", "output_path": last_output_path},
                        cost=total_cost,
                    )
                    return OpResult(
                        status="ok", shot_id=shot_id, op_id=op_id,
                        output_path=last_output_path, cost_usd=total_cost,
                        attempts=attempts, failure_mode=None,
                        validation_notes=validation_notes,
                        coverage_context=coverage_context.to_dict() if coverage_context else None,
                    )

                # Route through action map
                action = _resolve_action(failure_mode, step_result)

                if action == "ACCEPT":
                    # SOFT findings accepted (PC-1 keep-bias)
                    note = f"{failure_mode.value}: accepted (soft finding)"
                    validation_notes.append(note)
                    ops_log.log_op_completed(
                        log_path, op_id=op_id,
                        outputs={"status": "ok", "output_path": last_output_path,
                                 "validation_notes": validation_notes},
                        cost=total_cost,
                    )
                    return OpResult(
                        status="ok", shot_id=shot_id, op_id=op_id,
                        output_path=last_output_path, cost_usd=total_cost,
                        attempts=attempts, failure_mode=failure_mode.value,
                        validation_notes=validation_notes,
                        coverage_context=coverage_context.to_dict() if coverage_context else None,
                    )

                # Non-success handled below
                # (should not reach here on success + non-ACCEPT, but safety net)
                continue

            # ── Failed step result ─────────────────────────────────────
            last_output_path = step_result.output_path
            failure_mode = _extract_failure_mode(step_result)
            last_failure_mode = failure_mode

            # Check for content filter errors (might not have a gate_verdict)
            if step_result.error and needs_softening(step_result.error or ""):
                failure_mode = FailureMode.CONTENT_FILTER_HARD_BLOCK
                last_failure_mode = failure_mode

            action = _resolve_action(failure_mode, step_result)

            # ── ACCEPT action (on failure path -- deferred/soft) ───────
            if action == "ACCEPT":
                note = f"{failure_mode.value}: accepted (soft finding)"
                validation_notes.append(note)
                # If we have usable output, return ok
                if last_output_path:
                    ops_log.log_op_completed(
                        log_path, op_id=op_id,
                        outputs={"status": "ok", "output_path": last_output_path,
                                 "validation_notes": validation_notes},
                        cost=total_cost,
                    )
                    return OpResult(
                        status="ok", shot_id=shot_id, op_id=op_id,
                        output_path=last_output_path, cost_usd=total_cost,
                        attempts=attempts, failure_mode=failure_mode.value,
                        validation_notes=validation_notes,
                        coverage_context=coverage_context.to_dict() if coverage_context else None,
                    )
                # No output -- continue to next attempt
                continue

            # ── REVIEW_QUEUE action (immediate escalation) ─────────────
            if action == "REVIEW_QUEUE":
                rq_entry = rq.enqueue(
                    queue_path=_review_queue_path(paths),
                    project=paths.project,
                    episode_id=shot.get("episode_id", ""),
                    shot_id=shot_id,
                    run_id=run_id,
                    reason="immediate_review",
                    failure_mode=failure_mode.value,
                    attempts=attempt_records,
                    total_cost_usd=total_cost,
                    original_prompt=prompt,
                )
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": "needs_review", "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status="needs_review", shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode=failure_mode.value,
                    validation_notes=validation_notes,
                    review_queue_id=rq_entry.get("rq_id"),
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # ── SOFTEN_RETRY action ────────────────────────────────────
            if action == "SOFTEN_RETRY":
                if not softened_once and is_model_soften_eligible(model):
                    softened = soften_prompt(prompt, shot_id, model)
                    if softened:
                        prompt = softened
                        softened_once = True
                        continue  # retry with softened prompt on next outer attempt
                # Double failure or not eligible -- REVIEW_QUEUE
                rq_entry = rq.enqueue(
                    queue_path=_review_queue_path(paths),
                    project=paths.project,
                    episode_id=shot.get("episode_id", ""),
                    shot_id=shot_id,
                    run_id=run_id,
                    reason="soften_retry_exhausted",
                    failure_mode=failure_mode.value,
                    attempts=attempt_records,
                    total_cost_usd=total_cost,
                    original_prompt=shot.get("prompt", ""),
                    extra={"softened_prompt": prompt} if softened_once else None,
                )
                ops_log.log_op_completed(
                    log_path, op_id=op_id,
                    outputs={"status": "needs_review", "output_path": last_output_path},
                    cost=total_cost,
                )
                return OpResult(
                    status="needs_review", shot_id=shot_id, op_id=op_id,
                    output_path=last_output_path, cost_usd=total_cost,
                    attempts=attempts, failure_mode=failure_mode.value,
                    validation_notes=validation_notes,
                    review_queue_id=rq_entry.get("rq_id"),
                    coverage_context=coverage_context.to_dict() if coverage_context else None,
                )

            # ── AUTO_REROLL action ─────────────────────────────────────
            if action == "AUTO_REROLL":
                # The inner feedback attempts are handled by StepRunner's own
                # retry loop (max_gate_retries). At the run_shot level, we just
                # continue to the next outer attempt. The feedback agent is
                # invoked by StepRunner internally.
                # If we've reached here, StepRunner already exhausted its inner
                # retries for this attempt -- continue to next outer attempt.
                continue

        # ── All outer attempts exhausted ───────────────────────────────
        rq_entry = rq.enqueue(
            queue_path=_review_queue_path(paths),
            project=paths.project,
            episode_id=shot.get("episode_id", ""),
            shot_id=shot_id,
            run_id=run_id,
            reason="attempts_exhausted",
            failure_mode=last_failure_mode.value if last_failure_mode else "unknown",
            attempts=attempt_records,
            total_cost_usd=total_cost,
            original_prompt=shot.get("prompt", ""),
        )
        ops_log.log_op_completed(
            log_path, op_id=op_id,
            outputs={"status": "attempts_exhausted", "output_path": last_output_path},
            cost=total_cost,
        )
        return OpResult(
            status="attempts_exhausted", shot_id=shot_id, op_id=op_id,
            output_path=last_output_path, cost_usd=total_cost,
            attempts=attempts, failure_mode=last_failure_mode.value if last_failure_mode else None,
            validation_notes=validation_notes,
            review_queue_id=rq_entry.get("rq_id"),
            coverage_context=coverage_context.to_dict() if coverage_context else None,
        )

    except Exception:
        # NEVER raise on expected failure (D6).
        # Unhandled exceptions -> crashed status.
        tb = traceback.format_exc()
        logger.error("run_shot CRASHED for %s: %s", shot_id, tb)
        try:
            ops_log.log_op_crashed(
                log_path, op_id=op_id,
                error=f"Unhandled exception in run_shot for {shot_id}",
                traceback_text=tb,
            )
        except Exception as log_err:
            logger.exception(
                "run_shot: ops_log secondary failure (suppressed): %s",
                log_err,
            )
            pass  # Don't let logging crash prevent OpResult return
        return OpResult(
            status="crashed", shot_id=shot_id, op_id=op_id,
            output_path=last_output_path, cost_usd=total_cost,
            attempts=attempts, failure_mode=last_failure_mode.value if last_failure_mode else None,
            validation_notes=validation_notes,
            coverage_context=coverage_context.to_dict() if coverage_context else None,
        )
