"""CP-9 Phase 7 — single-judge PanelOfJudges using LegacyFlashCriticEvalNode.

Verifies that the legacy-critic adapter slots into a Phase 3 PanelOfJudges
exactly like a Gemini Vision judge:

  - PanelOfJudges accepts a LegacyFlashCriticEvalNode as a judge.
  - Single-judge panel produces a scorecard with the expected shape:
    panel_id, panel_score, panel_warnings, judges, aggregation,
    panel_cost_usd.
  - panel_score equals the legacy adapter's score (1.0 / 0.0).
  - panel_warnings is empty for a single judge (no outliers possible).
  - judges list has exactly 1 entry containing the adapter's EvalResult.

The wrapped CriticLoop is a unittest.mock.Mock — never invokes a real
Gemini Flash critic.
"""

import sys
import pathlib
from unittest.mock import Mock

sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent.parent.parent))
from recoil.core.paths import ensure_pipeline_importable  # noqa: E402

ensure_pipeline_importable()

from recoil.pipeline.core.eval import (  # noqa: E402
    EvalContext,
    PanelOfJudges,
)
from recoil.core.critic import (  # noqa: E402
    LegacyFlashCriticEvalNode,
    CriticResult,
    Dimension,
    Outcome,
    Severity,
)


def _ctx(tmp_path: pathlib.Path) -> EvalContext:
    artifact = tmp_path / "frame.png"
    artifact.write_bytes(b"\x89PNG\r\n")
    return EvalContext(
        target_artifact_path=artifact,
        target_take=None,
        prompt="hero stands in alley",
        rubric="identity locked",
        judge_id="caller",
    )


def _mock_critic(outcome: Outcome, dim_message: str = "ok") -> Mock:
    critic = Mock()
    critic.model_id = "gemini-2.0-flash"
    critic.run.return_value = (
        "/tmp/frame.png",
        CriticResult(
            critic_name="mock",
            outcome=outcome,
            dimensions=[
                Dimension(
                    name="identity",
                    severity=Severity.HARD,
                    passed=outcome == Outcome.PASS,
                    message=dim_message,
                )
            ],
        ),
    )
    return critic


def test_panel_accepts_legacy_adapter_as_judge(tmp_path) -> None:
    """PanelOfJudges construction succeeds with a single legacy adapter."""
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    assert panel.panel_id == "legacy_only"
    assert len(panel.judges) == 1
    assert panel.judges[0] is adapter


def test_single_judge_panel_pass_outcome_yields_score_1_0(tmp_path) -> None:
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    receipt = Mock()  # panel does not read receipt
    scorecard = panel.score(receipt, _ctx(tmp_path))
    assert scorecard["panel_score"] == 1.0


def test_single_judge_panel_fail_outcome_yields_score_0_0(tmp_path) -> None:
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.FAIL))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    assert scorecard["panel_score"] == 0.0


def test_scorecard_has_expected_shape(tmp_path) -> None:
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    expected_keys = {
        "panel_id",
        "panel_score",
        "panel_warnings",
        "judges",
        "aggregation",
        "panel_cost_usd",
    }
    assert expected_keys.issubset(scorecard.keys())


def test_panel_warnings_empty_for_single_judge(tmp_path) -> None:
    """One judge → no outlier flagging possible → no warnings."""
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    assert scorecard["panel_warnings"] == []


def test_judges_list_contains_one_entry(tmp_path) -> None:
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    assert len(scorecard["judges"]) == 1
    judge_entry = scorecard["judges"][0]
    assert judge_entry["judge_id"] == "legacy_flash_critic_v1"
    assert judge_entry["score"] == 1.0
    assert judge_entry["model_used"] == "gemini-2.0-flash"


def test_aggregation_default_median_preserved(tmp_path) -> None:
    """Even with one judge, aggregation defaults to 'median'."""
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    assert scorecard["aggregation"] == "median"


def test_panel_cost_usd_is_zero_for_legacy_only_panel(tmp_path) -> None:
    """LegacyFlashCriticEvalNode reports cost_usd=0.0 → panel cost is 0.0."""
    adapter = LegacyFlashCriticEvalNode(_mock_critic(Outcome.PASS))
    panel = PanelOfJudges(panel_id="legacy_only", judges=[adapter])
    scorecard = panel.score(Mock(), _ctx(tmp_path))
    assert scorecard["panel_cost_usd"] == 0.0
