"""EvalRunnerBase — shared run() logic for all eval modality runners.

Concrete subclasses (EvalImageRunner, EvalVideoRunner, EvalAudioRunner) inherit
this class and only override the ``modality`` class attribute. The entire
``run()`` body lives here.

Payload schema (common to all eval modality runners):
    {
        "shot_id": str (required, for receipt provenance + RunResult.id),
        "artifact_path": str | Path (required, the artifact to score),
        "rubric": str (required, the scoring prompt the judge applies),
        "judge_id": str (optional, default = self.modality),
        "ref_paths": Optional[list[str | Path]] (passed through to
            EvalContext.metadata["ref_paths"] for judges that consume them),
        "scene_takes": Optional[list] (reserved; passed through as
            EvalContext.scene_takes),
        "prompt": Optional[str] (the original generation prompt that
            produced the artifact; passed through as EvalContext.prompt
            for prompt-adherence scoring),
        "_transport": Optional[Callable] (test-only injection for the
            underlying provider adapter — threaded into the judge via
            EvalContext.metadata["_transport"]).
    }

Class name + zero-arg constructor preserved per CP-4 ModalityRunner contract
and CP-8 AudioRunner shape. RunResult.id is built via
:func:`pipeline.core.runners._shared.make_run_result_id` (nanosecond suffix)
so two evals on the same shot do not collide.

Failure metadata uses :func:`pipeline.core.runners._shared._failure_metadata_eval`
— six keys: ``final_state``, ``eval_score``, ``eval_reasoning``, ``judge_id``,
``model_used``, ``eval_cost_usd``.
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

from recoil.pipeline.core.eval import EvalContext, EvalResult, get_eval_node
from recoil.pipeline.core.registry import RunResult
from recoil.pipeline.core.runners._shared import (
    _failure_metadata_eval,
    make_run_result_id,
)


def _make_failure(
    modality: str,
    shot_id: str | None,
    error: str,
    *,
    exc: Exception | None = None,
    judge_id: str | None = None,
    judge: Any = None,
) -> RunResult:
    md = _failure_metadata_eval()
    if exc is not None:
        md["error_class"] = type(exc).__name__
    if judge_id is not None:
        md["judge_id"] = judge_id
    if judge is not None:
        md["model_used"] = getattr(judge, "model_used", None)
    return RunResult(
        id=make_run_result_id(shot_id, modality),
        modality=modality,
        success=False,
        error=error,
        metadata=md,
    )


class EvalRunnerBase:
    """Base class for eval modality runners.

    Subclass and set ``modality`` to the MODALITY_EVAL_*_V1 constant.
    No other overrides are needed for standard eval behavior.
    """

    modality: str  # must be set by subclass

    def __init__(self) -> None:
        # Zero-arg by design — runner has no per-process state. Matches
        # CP-8 AudioRunner shape; payload-driven via judge_id lookup.
        pass

    def run(self, payload: dict) -> RunResult:
        shot_id = payload.get("shot_id")
        artifact_path_raw = payload.get("artifact_path")
        rubric = payload.get("rubric")
        judge_id = payload.get("judge_id") or self.modality

        if not shot_id or not artifact_path_raw or not rubric:
            return _make_failure(
                self.modality,
                shot_id,
                f"{type(self).__name__} payload missing required keys: "
                f"shot_id={shot_id!r}, "
                f"artifact_path={artifact_path_raw!r}, "
                f"rubric={'...' if rubric else None!r}",
            )

        try:
            judge = get_eval_node(judge_id)
        except KeyError as e:
            return _make_failure(
                self.modality,
                shot_id,
                f"unknown judge_id {judge_id!r}: {e}",
                exc=e,
                judge_id=judge_id,
            )

        prompt = payload.get("prompt")
        if not isinstance(prompt, str):
            prompt = "" if prompt is None else str(prompt)

        ctx_metadata: dict[str, Any] = {}
        transport = payload.get("_transport")
        if transport is not None:
            ctx_metadata["_transport"] = transport
        ref_paths_raw = payload.get("ref_paths")
        if ref_paths_raw:
            ctx_metadata["ref_paths"] = [
                p if isinstance(p, Path) else Path(p) for p in ref_paths_raw
            ]

        try:
            ctx = EvalContext(
                target_artifact_path=Path(artifact_path_raw),
                target_take=payload.get("target_take"),
                prompt=prompt,
                rubric=rubric,
                judge_id=judge_id,
                metadata=ctx_metadata,
                scene_takes=list(payload.get("scene_takes") or []),
            )
        except (TypeError, ValueError) as e:
            return _make_failure(
                self.modality,
                shot_id,
                f"EvalContext construction failed: {type(e).__name__}: {e}",
                exc=e,
                judge_id=judge_id,
                judge=judge,
            )

        try:
            result: EvalResult = judge.evaluate(ctx)
        except Exception as e:  # noqa: BLE001
            return _make_failure(
                self.modality,
                shot_id,
                f"{type(e).__name__}: {e}",
                exc=e,
                judge_id=judge_id,
                judge=judge,
            )

        # Defend against malformed EvalResult — float/dict coercion can raise
        # if the judge returns a non-conforming object. Per runner contract,
        # ALWAYS return a RunResult; never let the judge crash the runner.
        try:
            return RunResult(
                id=make_run_result_id(shot_id, self.modality),
                modality=self.modality,
                output_path=None,  # eval produces no artifact, only a verdict
                output_url=None,
                success=True,
                error=None,
                metadata={
                    "final_state": "succeeded",
                    "eval_score": float(result.score),
                    "eval_reasoning": result.reasoning,
                    "judge_id": result.judge_id,
                    "model_used": result.model_used,
                    "eval_cost_usd": float(result.cost_usd),
                    "judge_metadata": dict(result.metadata),
                },
            )
        except (TypeError, ValueError, AttributeError) as e:
            return _make_failure(
                self.modality,
                shot_id,
                f"malformed EvalResult from judge: {type(e).__name__}: {e}",
                exc=e,
                judge_id=judge_id,
                judge=judge,
            )


__all__ = ["EvalRunnerBase"]
