"""CP-9 Phase 7 — LegacyFlashCriticEvalNode adapter tests.

Coverage:
  - PASS outcome → score=1.0
  - FAIL outcome → score=0.0
  - ERROR outcome → score=0.0
  - Multiple dimensions joined into reasoning string
  - EvalResult shape (score, reasoning, judge_id, model_used, cost_usd, metadata)
  - EvalNode Protocol conformance (judge_id + model_used + evaluate present)
  - Custom judge_id parameter respected
  - Default judge_id is "legacy_flash_critic_v1"
  - model_used falls back to "unknown" when wrapped critic exposes no model_id
  - model_used echoed from wrapped critic.model_id when present
  - cost_usd is always 0.0 (CriticResult does not expose reliable cost)
  - metadata captures outcome.value + dimension names

The wrapped CriticLoop is a unittest.mock.Mock — never invokes a real
Gemini Flash critic. Per Phase 1 audit § 12f items 2-3 (LOCKED).
"""

import sys
import pathlib
from unittest.mock import Mock

sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent.parent.parent))
from recoil.core.paths import ensure_pipeline_importable  # noqa: E402

ensure_pipeline_importable()

from recoil.pipeline.core.eval import EvalContext, EvalResult, EvalNode  # noqa: E402
from recoil.core.critic import (  # noqa: E402
    LegacyFlashCriticEvalNode,
    CriticResult,
    Dimension,
    Outcome,
    Severity,
)


# ── Helpers ──────────────────────────────────────────────────────────


def _make_ctx(tmp_path: pathlib.Path) -> EvalContext:
    """Build a minimal EvalContext with an on-disk artifact stub."""
    artifact = tmp_path / "frame.png"
    artifact.write_bytes(b"\x89PNG\r\n")
    return EvalContext(
        target_artifact_path=artifact,
        target_take=None,
        prompt="hero stands in alley",
        rubric="identity locked, no extra limbs",
        judge_id="caller_id",
        metadata={"shot_id": "EP001_SH02"},
    )


def _mock_critic(
    *,
    outcome: Outcome,
    dimensions: list,
    model_id: str | None = "gemini-2.0-flash",
) -> Mock:
    """Build a Mock CriticLoop whose .run returns (artifact, CriticResult)."""
    critic = Mock()
    if model_id is not None:
        critic.model_id = model_id
    else:
        # Explicitly remove the auto-magic Mock attr so getattr falls back.
        del critic.model_id
    result = CriticResult(
        critic_name="mock_critic",
        outcome=outcome,
        dimensions=dimensions,
    )
    critic.run.return_value = ("/tmp/frame.png", result)
    return critic


# ── PASS / FAIL / ERROR mapping ──────────────────────────────────────


def test_pass_outcome_maps_to_score_1_0(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.PASS,
        dimensions=[
            Dimension(
                name="identity",
                severity=Severity.HARD,
                passed=True,
                message="identity locked",
            )
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.score == 1.0
    assert isinstance(result, EvalResult)


def test_fail_outcome_maps_to_score_0_0(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.FAIL,
        dimensions=[
            Dimension(
                name="anatomy",
                severity=Severity.HARD,
                passed=False,
                message="extra arm detected",
            )
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.score == 0.0


def test_error_outcome_maps_to_score_0_0(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.ERROR,
        dimensions=[
            Dimension(
                name="SYSTEM_ERROR",
                severity=Severity.HARD,
                passed=False,
                message="vision API down",
            )
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    # ERROR is NOT PASS — score must be 0.0 (not 1.0).
    assert result.score == 0.0


# ── Reasoning + dimensions ───────────────────────────────────────────


def test_dimensions_joined_into_reasoning(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.FAIL,
        dimensions=[
            Dimension(
                name="identity",
                severity=Severity.HARD,
                passed=False,
                message="face does not match hero ref",
            ),
            Dimension(
                name="wardrobe",
                severity=Severity.HARD,
                passed=False,
                message="jacket color drifted",
            ),
            Dimension(
                name="background",
                severity=Severity.SOFT,
                passed=True,
                message="background OK",
            ),
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    # All three dimension messages appear, joined by " | ".
    assert "face does not match hero ref" in result.reasoning
    assert "jacket color drifted" in result.reasoning
    assert "background OK" in result.reasoning
    assert " | " in result.reasoning


def test_metadata_captures_outcome_and_dimension_names(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.FAIL,
        dimensions=[
            Dimension(
                name="identity",
                severity=Severity.HARD,
                passed=False,
                message="x",
            ),
            Dimension(
                name="wardrobe",
                severity=Severity.HARD,
                passed=False,
                message="y",
            ),
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.metadata["outcome"] == "fail"
    assert result.metadata["dimensions"] == ["identity", "wardrobe"]


# ── EvalResult shape + cost ──────────────────────────────────────────


def test_eval_result_shape_complete(tmp_path) -> None:
    """Adapter returns a fully-populated EvalResult — every field set."""
    critic = _mock_critic(
        outcome=Outcome.PASS,
        dimensions=[
            Dimension(name="identity", severity=Severity.HARD, passed=True, message="ok")
        ],
    )
    adapter = LegacyFlashCriticEvalNode(critic, judge_id="custom_judge")
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.score == 1.0
    assert isinstance(result.reasoning, str)
    assert result.judge_id == "custom_judge"
    assert result.model_used == "gemini-2.0-flash"
    # CriticResult does NOT expose cost — adapter reports 0.0 conservatively.
    assert result.cost_usd == 0.0
    assert isinstance(result.metadata, dict)
    assert "outcome" in result.metadata
    assert "dimensions" in result.metadata


def test_cost_usd_always_zero_even_when_critic_has_cost_field(tmp_path) -> None:
    """CriticResult.cost is unreliable; adapter ignores it (returns 0.0)."""
    critic = Mock()
    critic.model_id = "gemini-2.0-flash"
    critic.run.return_value = (
        "/tmp/frame.png",
        CriticResult(
            critic_name="mock",
            outcome=Outcome.PASS,
            dimensions=[
                Dimension(name="x", severity=Severity.HARD, passed=True, message="ok")
            ],
            cost=0.42,  # Adapter must ignore this.
        ),
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.cost_usd == 0.0


# ── EvalNode Protocol conformance ────────────────────────────────────


def test_adapter_satisfies_eval_node_protocol(tmp_path) -> None:
    """isinstance(adapter, EvalNode) — Protocol is runtime_checkable."""
    critic = _mock_critic(
        outcome=Outcome.PASS,
        dimensions=[],
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    assert isinstance(adapter, EvalNode)
    # Required attrs present.
    assert isinstance(adapter.judge_id, str) and adapter.judge_id
    assert isinstance(adapter.model_used, str)
    assert callable(adapter.evaluate)


def test_default_judge_id_is_legacy_flash_critic_v1(tmp_path) -> None:
    critic = _mock_critic(outcome=Outcome.PASS, dimensions=[])
    adapter = LegacyFlashCriticEvalNode(critic)
    assert adapter.judge_id == "legacy_flash_critic_v1"


def test_custom_judge_id_respected(tmp_path) -> None:
    critic = _mock_critic(outcome=Outcome.PASS, dimensions=[])
    adapter = LegacyFlashCriticEvalNode(critic, judge_id="frame_critic_v2")
    assert adapter.judge_id == "frame_critic_v2"


# ── model_used resolution ────────────────────────────────────────────


def test_model_used_echoed_from_critic_model_id(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.PASS,
        dimensions=[],
        model_id="gemini-3.1-flash-image-preview",
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.model_used == "gemini-3.1-flash-image-preview"


def test_model_used_falls_back_to_unknown_when_critic_has_no_model_id(tmp_path) -> None:
    critic = _mock_critic(
        outcome=Outcome.PASS,
        dimensions=[],
        model_id=None,
    )
    adapter = LegacyFlashCriticEvalNode(critic)
    result = adapter.evaluate(_make_ctx(tmp_path))
    assert result.model_used == "unknown"


# ── critic.run invocation contract ───────────────────────────────────


def test_adapter_calls_critic_run_with_string_path_and_merged_context_dict(tmp_path) -> None:
    """Per § 12f item 3: legacy critic expects str path + dict context.

    Adapter merges EvalContext.metadata with first-class typed fields
    (prompt / rubric / judge_id / target_take) so wrapped critics can
    read either interface. Explicit keys win over metadata of same name.
    """
    critic = _mock_critic(outcome=Outcome.PASS, dimensions=[])
    adapter = LegacyFlashCriticEvalNode(critic)
    ctx = _make_ctx(tmp_path)
    adapter.evaluate(ctx)
    critic.run.assert_called_once()
    _, kwargs = critic.run.call_args
    assert kwargs["artifact"] == str(ctx.target_artifact_path)
    assert isinstance(kwargs["artifact"], str)  # str, not Path
    # Context is a merged dict containing metadata + first-class fields.
    assert isinstance(kwargs["context"], dict)
    # Metadata keys preserved.
    for k, v in (ctx.metadata or {}).items():
        if k not in {"prompt", "rubric", "judge_id", "target_take"}:
            assert kwargs["context"][k] == v
    # First-class typed fields available to wrapped critic.
    assert kwargs["context"]["prompt"] == ctx.prompt
    assert kwargs["context"]["rubric"] == ctx.rubric
    assert kwargs["context"]["judge_id"] == ctx.judge_id
