"""recoil.pipeline._lib.sanctioned_fallbacks — Tenet 6 sanctioned-fallback registry.

Tenet 6 (Errors Must Be Visible) carves out a permitted class of fallback
behavior. A fallback is sanctioned (compliant) only if all three hold:

    1. NAMED. Appears in this registry with a one-line justification.
       Anonymous fallbacks are violations.
    2. OBSERVABLE. Every firing emits a structured WARNING via
       `fire_sanctioned_fallback(...)`. The operator can grep
       "FALLBACK_FIRED" and see the full list.
    3. QUALITY-NEUTRAL. The substitution cannot affect the bytes of any
       generated artifact (script, still, video, audio).

Failing any prong: the path is a Tenet 6 violation that must be FIXED
(raise / typed-failure return), not registered.

Public API:
    register_sanctioned_fallback(record): adds a registration. Idempotent
        for byte-identical records; raises for same-name-different-content.
    fire_sanctioned_fallback(name, **context): logs FALLBACK_FIRED and
        returns the substitution (computed by the registrant's substitute_fn,
        or None if the registrant only documents — caller substitutes inline).
    get_sanctioned_fallback(name): returns the FallbackRecord or raises
        KeyError if no such fallback is registered.
    list_sanctioned_fallbacks(): returns the registry as a list of records,
        for diagnostics / Phase 11 audit.
"""

from __future__ import annotations

import logging
import threading
from dataclasses import dataclass
from typing import Any, Callable, Optional

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class FallbackRecord:
    """One sanctioned fallback. Frozen — registration is immutable."""

    name: str
    """Stable identifier; must be unique across the registry."""

    justification: str
    """One-line operator-readable why."""

    quality_neutrality_argument: str
    """Why this substitution cannot affect generation bytes. Required."""

    expected_substitution: str
    """What value (or shape) the fallback substitutes. Operator-readable."""

    log_level: int = logging.WARNING
    """The log level for FALLBACK_FIRED emissions. Default WARNING."""

    substitute_fn: Optional[Callable[..., Any]] = None
    """Optional helper that computes the substitution from context kwargs.
    If None, callers compute the substitution inline; fire_sanctioned_fallback
    only emits the log line and returns None."""

    introduced_in: str = ""
    """Provenance: which phase / CP introduced this fallback."""

    retire_by: Optional[str] = None
    """ISO 8601 date (YYYY-MM-DD); None means 'permanent edge-case recovery'.
    A non-None value past today triggers a boot-time RuntimeError. Set when
    registering a transition-mechanism fallback that must die by a date."""

    retire_reason: Optional[str] = None
    """One-line operator-readable explanation. Required iff retire_by is set;
    /spec-review rejects entries that pair retire_by with empty retire_reason."""


# Module-level registry. Mutation guarded by a lock for safe import-time
# registration. Per Tenet 4, this is the single home; consumers go through
# register_sanctioned_fallback / fire_sanctioned_fallback / get_sanctioned_fallback.
_REGISTRY: dict[str, FallbackRecord] = {}
_LOCK = threading.Lock()


class DuplicateFallbackRegistration(ValueError):
    """A second registration for the same name with different content.

    Public so callers can catch this specifically (e.g., import-side-effect
    racing tests) without resorting to broad ``except ValueError``.
    """


def register_sanctioned_fallback(record: FallbackRecord) -> None:
    """Register a sanctioned fallback.

    Idempotent for byte-identical re-registration of the same record.
    Raises ``DuplicateFallbackRegistration`` if a record with the same name
    but different content is registered (catches accidental dual-registration
    from import side effects).
    """
    if not record.name:
        raise ValueError("FallbackRecord.name is required")
    if not record.justification:
        raise ValueError(f"FallbackRecord({record.name!r}).justification is required")
    if not record.quality_neutrality_argument:
        raise ValueError(
            f"FallbackRecord({record.name!r}).quality_neutrality_argument is required"
            " — this enforces the Tenet 6 quality-neutrality prong"
        )
    if not record.expected_substitution:
        raise ValueError(
            f"FallbackRecord({record.name!r}).expected_substitution is required"
        )

    with _LOCK:
        existing = _REGISTRY.get(record.name)
        if existing is None:
            _REGISTRY[record.name] = record
            return
        if existing == record:
            return
        raise DuplicateFallbackRegistration(
            f"sanctioned fallback {record.name!r} already registered with different content"
        )


def fire_sanctioned_fallback(name: str, **context: Any) -> Any:
    """Emit FALLBACK_FIRED log and return the substitution.

    The log line format is ``FALLBACK_FIRED name=<name> <ctx>`` so operators
    can grep ``FALLBACK_FIRED`` and immediately filter by ``name=``.

    If the registrant has a ``substitute_fn``, its return value is the
    function's return; otherwise returns ``None`` and the caller substitutes
    inline (still emits the log line — the observability prong holds).
    """
    record = get_sanctioned_fallback(name)
    if logger.isEnabledFor(record.log_level):
        ctx_str = " ".join(f"{k}={v!r}" for k, v in sorted(context.items()))
        logger.log(
            record.log_level,
            "FALLBACK_FIRED name=%s justification=%r %s",
            name,
            record.justification,
            ctx_str,
        )
    if record.substitute_fn is not None:
        return record.substitute_fn(**context)
    return None


def get_sanctioned_fallback(name: str) -> FallbackRecord:
    """Return the registered record or raise KeyError.

    Raising KeyError (rather than logging + returning a default) is
    deliberate — an unknown sanctioned-fallback name is a programmer error,
    not a runtime fallback.
    """
    with _LOCK:
        record = _REGISTRY.get(name)
    if record is None:
        raise KeyError(f"no sanctioned fallback registered for name={name!r}")
    return record


def list_sanctioned_fallbacks() -> list[FallbackRecord]:
    """Return all registered fallbacks (snapshot copy; safe to mutate)."""
    with _LOCK:
        return list(_REGISTRY.values())


def _reset_for_tests() -> None:
    """Test-only: clear the registry. Production code MUST NOT call this."""
    with _LOCK:
        _REGISTRY.clear()


# ---------------------------------------------------------------------------
# Initial registrants (introduced in Phase E.3)
#
# These three pass the three-prong test from spec-time inspection. Phase 1's
# inventory surfaced 11 additional category-(a) candidates; those need
# observability fixes (log.warning emission) before they qualify, and are
# registered inline by Phases 6/7/8 in the same commits as their site fixes.
# ---------------------------------------------------------------------------


register_sanctioned_fallback(
    FallbackRecord(
        name="model_alias_resolver",
        justification=(
            "Resolve a non-canonical model alias (display name, legacy name) "
            "to its canonical model id via the model_profiles.json alias map."
        ),
        quality_neutrality_argument=(
            "Resolution returns the SAME canonical model. The provider, the "
            "weights, the cost-per-second, and every downstream parameter are "
            "identical to a direct canonical-id call. No generation byte changes."
        ),
        expected_substitution=(
            "canonical model_id string (e.g. 'kling-v3') in place of the "
            "input alias (e.g. 'kling')"
        ),
        introduced_in="Phase E.3",
    )
)


register_sanctioned_fallback(
    FallbackRecord(
        name="cache_miss_canonical_source",
        justification=(
            "A cache lookup missed; fall back to the canonical on-disk source. "
            "Used by ref-descriptor cache, bible loader, model-profiles loader."
        ),
        quality_neutrality_argument=(
            "The canonical source IS the truth; the cache is an optimization. "
            "A miss returns the same data via a slower path. Subsequent calls "
            "(after cache repopulation) return the same value."
        ),
        expected_substitution=(
            "freshly-loaded value from the canonical disk file, identical to "
            "what a cache hit would have returned"
        ),
        introduced_in="Phase E.3",
    )
)


register_sanctioned_fallback(
    FallbackRecord(
        name="cost_unknown_telemetry_zero",
        justification=(
            "Cost metadata absent from a RunResult on a write-only telemetry "
            "surface (cost-display panes, ledger-aggregation paths that tolerate "
            "missing rows). The canonical raise path (CostMissingError) is the "
            "default; this fallback is the operator-approved escape valve for "
            "callers that explicitly opt in via get_cost(allow_missing=True) "
            "or read_cost_from_result_safe()."
        ),
        quality_neutrality_argument=(
            "The substitution (0.0) only flows into telemetry / display surfaces. "
            "It does NOT flow into budget caps, gate decisions, or any path that "
            "could affect generation bytes. Phase 1's inventory tags any caller "
            "that routes the substitution into a generation-affecting decision as "
            "category (b) — those callers must use the raising path."
        ),
        expected_substitution="0.0 (in place of the missing cost_usd reading)",
        introduced_in="Phase E.3",
    )
)


__all__ = [
    "FallbackRecord",
    "DuplicateFallbackRegistration",
    "register_sanctioned_fallback",
    "fire_sanctioned_fallback",
    "get_sanctioned_fallback",
    "list_sanctioned_fallbacks",
]


# ============================================================================
# Entries migrated from recoil/api/sanctioned_fallbacks.py (now deleted).
# ============================================================================

register_sanctioned_fallback(
    FallbackRecord(
        name="receipts_log_corrupt_line_skip",
        justification=(
            "A single corrupt JSONL line in receipts.log is dropped from "
            "the event stream rather than failing the whole /api/events request."
        ),
        quality_neutrality_argument=(
            "The user sees N-1 events instead of crashing; the dropped line "
            "surfaces as a fallback event. No generation bytes are affected — "
            "this is a read-only log-tailing path."
        ),
        expected_substitution=(
            "The corrupt line is skipped; remaining valid lines are yielded "
            "normally. A FALLBACK_FIRED event appears in the BUS history with "
            "the line preview and byte offset."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="shot_file_unreadable_drop",
        justification=(
            "A shot JSON file that fails json.load() is dropped from the "
            "beat list rather than crashing the list_beats() response."
        ),
        quality_neutrality_argument=(
            "Surface area shrinks by one; the dropped file path is in the "
            "fallback event payload for grep. No generation bytes are affected — "
            "this is a read-only adapter path."
        ),
        expected_substitution=(
            "The unreadable shot file is omitted from the beat list. The path "
            "and error are logged in the FALLBACK_FIRED event payload."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="take_id_not_on_disk",
        justification=(
            "Phase 17 fixture proposals carry take ids (e.g. b5_t3) that "
            "do not exist on disk. Mutations against those ids return "
            "ok=true but emit severity=fallback so the events drawer "
            "shows the gap."
        ),
        quality_neutrality_argument=(
            "No disk write happens, no user-visible state changes; the "
            "take_id is recorded in the event payload. The user sees ok=true "
            "with a fallback event — no generation bytes are affected. "
            "To be retired when fixture proposals migrate to engine-resolved "
            "take ids (CP-N+)."
        ),
        expected_substitution=(
            "ok=true response with severity=fallback BUS event; the take_id "
            "and action are in the event payload."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="proposal_id_not_pending",
        justification=(
            "approve/reject/defer against a proposal id that is not in "
            "the in-memory pending set. Same shape as take_id_not_on_disk "
            "and same retirement timeline."
        ),
        quality_neutrality_argument=(
            "No state change occurs; the event payload records the proposal_id "
            "and decision for operator triage. No generation bytes are affected."
        ),
        expected_substitution=(
            "ok=true response with severity=fallback BUS event; proposal_id "
            "and decision are in the event payload."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="workspace_state_user_chose_discard",
        justification=(
            "User picked Discard in <SessionRestoreModal>. The corrupt "
            "workspace blob is overwritten by DEFAULT_WORKSPACE_STATE "
            "per user consent."
        ),
        quality_neutrality_argument=(
            "The byte change happened with explicit user choice — the user "
            "consented to discarding the corrupt state. No generation bytes "
            "are affected."
        ),
        expected_substitution=(
            "DEFAULT_WORKSPACE_STATE overwrites the corrupt blob in SQLite "
            "with the user's explicit consent."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="workspace_state_user_chose_report",
        justification=(
            "User picked Report in <SessionRestoreModal>. The corrupt "
            "blob is appended to ~/.recoil/v2_corrupt_state_reports.jsonl "
            "for offline review; defaults are NOT persisted."
        ),
        quality_neutrality_argument=(
            "The user explicitly chose the inspection path. Defaults are not "
            "persisted — no generation bytes are affected."
        ),
        expected_substitution=(
            "The corrupt blob is appended to the side-channel JSONL file for "
            "offline inspection. No workspace state is overwritten."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="project_load_failure_isolated",
        justification=(
            "A single project failed to load (Pydantic ValidationError, "
            "OSError in nested adapter calls, late LegacyProjectFormatError "
            "from list_episodes, KeyError in bible parsing, etc.). The bad "
            "slug is dropped from /api/projects rather than 500-ing the "
            "whole endpoint and killing the project picker."
        ),
        quality_neutrality_argument=(
            "Surface area shrinks by one (same shape as shot_file_unreadable_drop); "
            "the dropped slug + exception type are in the fallback event payload "
            "for grep. No generation bytes are affected — this is a read-only "
            "adapter path."
        ),
        expected_substitution=(
            "The bad project slug is omitted from the /api/projects response. "
            "The slug and exception type are logged in the FALLBACK_FIRED event."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="take_status_unsignaled_default_queued",
        justification=(
            "When a raw take dict has neither `rejected`, an error field, nor a "
            "file path, the canonical TakeStatus is 'queued'. Fired when the "
            "engine emits a take before write but after queue-claim."
        ),
        quality_neutrality_argument=(
            "The user sees 'queued' instead of an exception; the take_id is "
            "logged for operator review. No generation bytes are affected — "
            "this is a read-only status-derivation path."
        ),
        expected_substitution=(
            "'queued' TakeStatus in place of a missing explicit status signal."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="take_eval_state_unsignaled_default_pending",
        justification=(
            "When a raw take dict has no gate_1/gate_2 verdict, no `rejected` "
            "flag, and no disposition, the canonical EvalState is 'pending'."
        ),
        quality_neutrality_argument=(
            "The user sees 'pending'; the take_id is logged for operator review. "
            "No generation bytes are affected — this is a read-only "
            "status-derivation path."
        ),
        expected_substitution=(
            "'pending' EvalState in place of a missing explicit eval verdict."
        ),
        introduced_in="api/sanctioned_fallbacks (consolidated Build A Phase 5, 2026-05-09)",
    )
)


# step_runner best-effort store-update and cleanup registrants

register_sanctioned_fallback(
    FallbackRecord(
        name="step_runner_outer_failure_store_update_skip",
        justification=(
            "When an outer step has already failed (keyframe, video, previz), the "
            "inner ExecutionStore update that would record the failure is best-effort. "
            "If the update fails too, the StepResult is still returned with the "
            "correct failure shape — the missing telemetry surfaces as a "
            "FALLBACK_FIRED event."
        ),
        quality_neutrality_argument=(
            "No artifact bytes are produced or modified — this fallback only "
            "skips a telemetry-write on an already-failed path. The take is "
            "already marked failed via the StepResult; the missed update is "
            "bookkeeping, not generation."
        ),
        expected_substitution="Telemetry write skipped; FALLBACK_FIRED event in BUS.",
        log_level=logging.WARNING,
        substitute_fn=None,
        introduced_in="step_runner Phase 17 (Build C, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="step_runner_batch_update_skip",
        justification=(
            "Per-shot ExecutionStore update inside a batch loop is best-effort. "
            "Other shots in the batch must continue regardless of one update "
            "failing. The shot-level outcome is conveyed via the returned "
            "StepResult; the missed update surfaces as a FALLBACK_FIRED event."
        ),
        quality_neutrality_argument=(
            "Batch-level continuation requires per-shot resilience; skipping a "
            "telemetry write does not change generation output. The take's "
            "actual file path / status is recorded via the canonical write "
            "earlier in the batch step."
        ),
        expected_substitution="Per-shot update skipped; loop continues; FALLBACK_FIRED.",
        log_level=logging.WARNING,
        substitute_fn=None,
        introduced_in="step_runner Phase 17 (Build C, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="step_runner_nested_cleanup_skip",
        justification=(
            "Nested cleanup (rm of temp files, log close) inside an outer "
            "exception handler is best-effort. The outer exception is already "
            "logged + propagated; the inner cleanup failure does not change "
            "control flow or generation output."
        ),
        quality_neutrality_argument=(
            "Cleanup paths run AFTER all generation has terminated (success or "
            "failure). They cannot affect output bytes. Skipping a temp-file "
            "rm leaves a stale file on disk for the next sweep but does not "
            "alter any take or sidecar."
        ),
        expected_substitution="Stale temp file may persist; FALLBACK_FIRED event.",
        log_level=logging.WARNING,
        substitute_fn=None,
        introduced_in="step_runner Phase 17 (Build C, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="step_runner_sidecar_best_effort_skip",
        justification=(
            "Sidecar write best-effort wrapper inside step_runner. The canonical "
            "sidecar write happens via the dispatch path; this site is a "
            "supplementary write that may fail without affecting the dispatch "
            "outcome."
        ),
        quality_neutrality_argument=(
            "The canonical sidecar (workspace/sidecar.py + core/sidecar_writer.py) "
            "is unmodified by this skip. The supplementary write is purely "
            "advisory — the take's primary sidecar still records its provenance."
        ),
        expected_substitution="Supplementary sidecar field not written; FALLBACK_FIRED.",
        log_level=logging.WARNING,
        substitute_fn=None,
        introduced_in="step_runner Phase 17 (Build C, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="step_runner_post_step_finalize_skip",
        justification=(
            "Post-step finalization tasks (mark-as-complete signaling, status "
            "broadcast) are best-effort. A failure in finalization does not "
            "invalidate the step's actual output; the next polling cycle "
            "discovers the completed state from disk."
        ),
        quality_neutrality_argument=(
            "Finalization signals are convenience — the on-disk state IS the "
            "source of truth. Skipping a finalize does not change any artifact."
        ),
        expected_substitution="Finalization signal skipped; next poll discovers state.",
        log_level=logging.WARNING,
        substitute_fn=None,
        introduced_in="step_runner Phase 17 (Build C, 2026-05-09)",
    )
)

register_sanctioned_fallback(
    FallbackRecord(
        name="recent_id_derivation_failed",
        justification=(
            "A file in the project output directory could not have beat_id derived "
            "from its path (no shot pattern, no sidecar, no recognizable folder). "
            "The file is still listed in the recents feed with beat_id=null; the "
            "frontend disables its row. Telemetry event fires for operator triage."
        ),
        quality_neutrality_argument=(
            "This is a read-only recents-feed path — no generation bytes are "
            "produced or modified. The file's path, mtime, and type are still "
            "returned correctly; only the id-derivation fields are null."
        ),
        expected_substitution=(
            "beat_id=null, episode_id=null, take_id=null for the affected row. "
            "All other row fields (name, path, mtime, type, status) are populated."
        ),
        introduced_in="Phase 2 (console-v2-per-project-mode, 2026-05-15)",
    )
)
