"""Canonical failure-mode taxonomy + classifier.

Phase C of the engine-architectural-audit fix sprint (2026-04-30) collapsed:
- 2 parallel enums (FailureMode in core.critic, FailureCategory in
  pipeline.orchestrator.production_types) into ONE canonical taxonomy
  (FailureMode is canonical; FailureCategory becomes a derived view).
- 3 classifier functions (run_shot._extract_failure_mode,
  strategy_registry.detect_failure_mode, retry_dispatcher.classify_failure)
  into ONE canonical entry point (classify_failure here).
- 5 disagreeing transient-pattern lists into ONE canonical pattern set
  (TRANSIENT_PATTERN_STRINGS + TRANSIENT_HTTP_CODES below).

Tenet 6 (Errors Must Be Visible) — codified 2026-04-30 in architectural-law.md:
The classifier ESCALATES on unknown error shapes. It does NOT silently
default to TRANSIENT (would retry forever) or PERMANENT (would give up
silently). UnknownFailureEscalation surfaces the unclassifiable input with
full context.

Frozen contract surface:
- FailureMode enum string values are stable. Adding members is safe;
  renaming or removing is breaking. Persisted in critics' Dimension records.
- classify_failure() signature: (*, error_text, gate_verdict, http_status,
  escalate_unknown) -> tuple[FailureMode, float]. All keyword-only,
  all params optional, returns (mode, confidence in [0.0, 1.0]).
- failure_category_for() signature: (mode: FailureMode) -> FailureCategory.
  Total over all FailureMode values except NONE (raises ValueError) and
  UNKNOWN (raises UnknownFailureEscalation per escalate_unknown contract).
"""

from __future__ import annotations

import logging
from typing import Optional

# Re-export the canonical enum from its existing home. core.critic stays
# the SSOT of FailureMode string values to avoid forcing critic callers
# (5 critic files + run_shot + strategy_registry + workspace/mcp_server)
# to migrate imports as part of Phase C.
from recoil.core.critic import FailureMode

# FailureCategory stays a derived view; we re-export it here so callers
# import the canonical taxonomy from one place.
from recoil.pipeline.orchestrator.production_types import FailureCategory

logger = logging.getLogger(__name__)


# ── Canonical pattern sets ──────────────────────────────────────────────
# Union of:
# - retry_dispatcher._TRANSIENT_PATTERNS (8 strings)
# - strategy_registry inline list (6 strings — adds 504, missing 500/429/ECONNRESET)
# - elevenlabs RETRYABLE_HTTP (5 codes)
# - sync_so RETRYABLE_HTTP (5 codes; identical to elevenlabs)
# - production_loop._classify_pass_error (re-imports retry_dispatcher list)
#
# Deliberate omissions: NONE.
#
# Note on 429: kept in TRANSIENT_PATTERN_STRINGS because retry_dispatcher
# already classified "429" string-matches as TRANSIENT. Provider adapters
# (elevenlabs, sync_so) treat HTTP 429 as fail-fast RateLimitError —
# NOT in TRANSIENT_HTTP_CODES. This dual disposition matches pre-Phase-C
# behavior and is verified by Phase 12 fixtures.

TRANSIENT_PATTERN_STRINGS: tuple[str, ...] = (
    "429",
    "rate limit",
    "500",
    "502",
    "503",
    "504",
    "timeout",
    "connection",
    "ECONNRESET",
)

TRANSIENT_HTTP_CODES: frozenset[int] = frozenset({500, 501, 502, 503, 504})

CONTENT_FILTER_PATTERNS: tuple[str, ...] = (
    "content policy",
    "content filter",
    "safety",
    "nsfw",
    "moderation",
    "rejected",
    "blocked",
    "policy",
    "refused",
)

BUDGET_PATTERNS: tuple[str, ...] = (
    "budget",
    "insufficient",
    "balance",
    "402",
)

SCHEMA_PATTERNS: tuple[str, ...] = (
    "422",
    "input should be",
    "unprocessable",
    "validation error",
)

IDENTITY_PATTERNS: tuple[str, ...] = (
    "identity",
    "drift",
    "face",
    "different person",
    "wrong character",
)

WARDROBE_PATTERNS: tuple[str, ...] = (
    "wardrobe",
    "costume",
    "clothing",
    "outfit",
    "phase",
)

MECHANICAL_PATTERNS: tuple[str, ...] = (
    "artifact",
    "finger",
    "limb",
    "hand",
    "anatomy",
    "merge",
    "distort",
)


# ── Tenet 6: escalation exception ───────────────────────────────────────
# Canonical home: recoil/core/exceptions.py::UnknownFailureEscalation.
# Re-exported here for one-cycle backward compatibility (Phase E.5).

from recoil.core.exceptions import UnknownFailureEscalation  # noqa: F401, E402  # DEPRECATED: Phase E.5 migration


# ── Canonical classifier ────────────────────────────────────────────────

def classify_failure(
    *,
    error_text: Optional[str] = None,
    gate_verdict: Optional[object] = None,
    http_status: Optional[int] = None,
    escalate_unknown: bool = True,
    caller: Optional[str] = None,
) -> tuple[FailureMode, float]:
    """Canonical failure classifier. Returns (mode, confidence in [0.0, 1.0]).

    Inputs are keyword-only, all optional. Pass whatever the caller has;
    the function combines string matching on error_text, HTTP-status
    matching, and gate-verdict introspection.

    Args:
        error_text: Free-form error string from a provider/StepResult.
        gate_verdict: Optional gate verdict object with .gate_name,
            .details, .deferred attributes (legacy GateVerdict shape).
        http_status: Optional HTTP status code (int, 100-599).
        escalate_unknown: If True (default), raises UnknownFailureEscalation
            when input cannot be classified. If False, returns
            (FailureMode.UNKNOWN, 0.0). Default True per Tenet 6.
        caller: Optional string identifying the caller for error logging.

    Returns:
        Tuple of (FailureMode, confidence). confidence ∈ [0.0, 1.0].

    Raises:
        UnknownFailureEscalation: If escalate_unknown=True and no
            classification rule matched.
    """
    error_lower = (error_text or "").lower()

    # ── Tier 0: HTTP-code exact match ───────────────────────────────────
    if http_status is not None:
        if http_status in TRANSIENT_HTTP_CODES:
            return (FailureMode.TRANSIENT, 0.95)
        if http_status == 429:
            return (FailureMode.TRANSIENT, 0.90)
        if http_status == 422:
            return (FailureMode.PROMPT_DURATION_MISMATCH, 0.90)
        if http_status in {401, 402}:
            return (FailureMode.COST_OVERRUN, 0.95)

    # ── Tier 1: string patterns on error_text ───────────────────────────
    if error_lower:
        for pat in BUDGET_PATTERNS:
            if pat in error_lower:
                return (FailureMode.COST_OVERRUN, 0.95)
        for pat in CONTENT_FILTER_PATTERNS:
            if pat in error_lower:
                return (FailureMode.CONTENT_FILTER_HARD_BLOCK, 0.95)
        for pat in SCHEMA_PATTERNS:
            if pat in error_lower:
                return (FailureMode.PROMPT_DURATION_MISMATCH, 0.85)
        for pat in TRANSIENT_PATTERN_STRINGS:
            if pat.lower() in error_lower:
                return (FailureMode.TRANSIENT, 0.90)
        for pat in WARDROBE_PATTERNS:
            if pat in error_lower:
                return (FailureMode.WARDROBE_MISMATCH, 0.80)
        for pat in IDENTITY_PATTERNS:
            if pat in error_lower:
                return (FailureMode.IDENTITY_DRIFT, 0.80)
        for pat in MECHANICAL_PATTERNS:
            if pat in error_lower:
                return (FailureMode.GATE_MECHANICAL, 0.75)

    # ── Tier 2: gate_verdict introspection ──────────────────────────────
    if gate_verdict is not None:
        gn = (getattr(gate_verdict, "gate_name", None) or "").lower()
        details = getattr(gate_verdict, "details", None) or {}
        cat = (
            details.get("failure_category", "")
            if isinstance(details, dict)
            else ""
        )
        try:
            if cat:
                return (FailureMode(cat), 0.85)
        except ValueError:
            pass
        if "identity" in gn or "identity" in cat:
            return (FailureMode.IDENTITY_DRIFT, 0.75)
        if "anatomy" in gn or "face_merge" in cat:
            return (FailureMode.ANATOMY_FACE_MERGE, 0.75)
        if "content_filter" in cat or "safety" in gn:
            return (FailureMode.CONTENT_FILTER_HARD_BLOCK, 0.85)
        if "wardrobe" in gn or "wardrobe" in cat:
            return (FailureMode.WARDROBE_MISMATCH, 0.75)
        if getattr(gate_verdict, "deferred", False):
            return (FailureMode.END_FRAME_DRIFT, 0.60)
        if "mechanical" in gn:
            return (FailureMode.GATE_MECHANICAL, 0.70)

    # ── Tier 3: nothing matched ─────────────────────────────────────────
    if escalate_unknown:
        logger.warning(
            "FALLBACK_FIRED unknown_failure_classification "
            "caller=%s error_text=%r http_status=%r",
            caller, error_text, http_status,
        )
        raise UnknownFailureEscalation(
            error_text=error_text,
            gate_verdict=gate_verdict,
            http_status=http_status,
            caller=caller,
        )
    return (FailureMode.UNKNOWN, 0.0)


# ── FailureMode → FailureCategory coarsening ────────────────────────────

_MODE_TO_CATEGORY: dict[FailureMode, FailureCategory] = {
    FailureMode.TRANSIENT: FailureCategory.TRANSIENT,
    FailureMode.CONTENT_FILTER_HARD_BLOCK: FailureCategory.CONTENT_FILTER,
    FailureMode.SAFETY_SOFTENED: FailureCategory.CONTENT_FILTER,
    FailureMode.GATE_MECHANICAL: FailureCategory.GATE_MECHANICAL,
    FailureMode.ANATOMY_FACE_MERGE: FailureCategory.GATE_MECHANICAL,
    FailureMode.ANATOMY_LIMB_MISCOUNT: FailureCategory.GATE_MECHANICAL,
    FailureMode.IDENTITY_DRIFT: FailureCategory.GATE_IDENTITY,
    FailureMode.REF_BLEED: FailureCategory.GATE_IDENTITY,
    FailureMode.WARDROBE_MISMATCH: FailureCategory.GATE_WARDROBE,
    FailureMode.MOTION_FAILURE: FailureCategory.GATE_VIDEO_DRIFT,
    FailureMode.END_FRAME_DRIFT: FailureCategory.GATE_VIDEO_DRIFT,
    FailureMode.CUTS_TOO_SOFT: FailureCategory.GATE_VIDEO_DRIFT,
    FailureMode.AUDIO_SYNC_DRIFT: FailureCategory.GATE_VIDEO_DRIFT,
    FailureMode.PROMPT_DURATION_MISMATCH: FailureCategory.PROMPT_DURATION_MISMATCH,
    FailureMode.COST_OVERRUN: FailureCategory.BUDGET,
    FailureMode.BACKGROUND_CONTAMINATION: FailureCategory.PERMANENT,
    FailureMode.COMPOSITION_WRONG: FailureCategory.PERMANENT,
    FailureMode.STYLE_DRIFT: FailureCategory.PERMANENT,
    FailureMode.LIGHTING_MISMATCH: FailureCategory.PERMANENT,
    FailureMode.GRID_INFLUENCE: FailureCategory.PERMANENT,
    FailureMode.COVERAGE_GEOMETRY_BROKEN: FailureCategory.PERMANENT,
}


def failure_category_for(mode: FailureMode) -> FailureCategory:
    """Coarsen FailureMode → FailureCategory for retry-policy lookup.

    Total function over the FailureMode enum EXCEPT:
    - FailureMode.NONE: raises ValueError (NONE means no failure; coarsening
      is a caller-error).
    - FailureMode.UNKNOWN: raises UnknownFailureEscalation per Tenet 6.

    Adding a new FailureMode without adding a mapping here will raise
    ValueError at runtime. The Phase 12 hard gate verifies the mapping
    is exhaustive across all FailureMode members.
    """
    if mode is FailureMode.NONE:
        raise ValueError("failure_category_for(NONE): NONE is not a failure")
    if mode is FailureMode.UNKNOWN:
        raise UnknownFailureEscalation(
            caller="failure_category_for",
            error_text="FailureMode.UNKNOWN passed to coarsening function",
        )
    cat = _MODE_TO_CATEGORY.get(mode)
    if cat is None:
        raise ValueError(
            f"failure_category_for: no mapping for {mode!r}. "
            f"Add it to _MODE_TO_CATEGORY in pipeline.core.failure_mode."
        )
    return cat


__all__ = [
    "FailureMode",
    "FailureCategory",
    "TRANSIENT_PATTERN_STRINGS",
    "TRANSIENT_HTTP_CODES",
    "CONTENT_FILTER_PATTERNS",
    "BUDGET_PATTERNS",
    "SCHEMA_PATTERNS",
    "IDENTITY_PATTERNS",
    "WARDROBE_PATTERNS",
    "MECHANICAL_PATTERNS",
    "UnknownFailureEscalation",
    "classify_failure",
    "failure_category_for",
]
