"""CP-9 Phase 4 — register_default_eval_runners + register_eval_runners tests.

Verifies opt-in registration semantics, force-kwarg behavior, the runners/__init__
re-export alias, and that auto-registration does NOT happen on plain dispatch
of an eval modality.
"""

import sys
import pathlib

sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent.parent.parent))
from recoil.core.paths import ensure_pipeline_importable  # noqa: E402

ensure_pipeline_importable()

import pytest  # noqa: E402

from recoil.pipeline.core.dispatch import (  # noqa: E402
    register_default_eval_runners,
    _reset_bootstrap_for_tests,
)
from recoil.pipeline.core.registry import (  # noqa: E402
    MODALITY_EVAL_AUDIO_V1,
    MODALITY_EVAL_IMAGE_V1,
    MODALITY_EVAL_VIDEO_V1,
    get_runner,
    is_registered,
    _reset_for_tests,
)
from recoil.pipeline.core.runners.eval_image_runner import EvalImageRunner  # noqa: E402
from recoil.pipeline.core.runners.eval_video_runner import EvalVideoRunner  # noqa: E402
from recoil.pipeline.core.runners.eval_audio_runner import EvalAudioRunner  # noqa: E402


@pytest.fixture(autouse=True)
def reset_registries():
    _reset_for_tests()
    _reset_bootstrap_for_tests()
    yield
    _reset_for_tests()
    _reset_bootstrap_for_tests()


def test_register_default_eval_runners_registers_three_modalities():
    assert not is_registered(MODALITY_EVAL_IMAGE_V1)
    assert not is_registered(MODALITY_EVAL_VIDEO_V1)
    assert not is_registered(MODALITY_EVAL_AUDIO_V1)
    register_default_eval_runners()
    assert is_registered(MODALITY_EVAL_IMAGE_V1)
    assert is_registered(MODALITY_EVAL_VIDEO_V1)
    assert is_registered(MODALITY_EVAL_AUDIO_V1)
    assert isinstance(get_runner(MODALITY_EVAL_IMAGE_V1), EvalImageRunner)
    assert isinstance(get_runner(MODALITY_EVAL_VIDEO_V1), EvalVideoRunner)
    assert isinstance(get_runner(MODALITY_EVAL_AUDIO_V1), EvalAudioRunner)


def test_register_default_eval_runners_idempotent_same_instances():
    """Calling twice without force must not raise — registry treats
    same-instance re-registration as no-op."""
    # First call: fresh registration.
    register_default_eval_runners()
    img1 = get_runner(MODALITY_EVAL_IMAGE_V1)
    # Second call: registers brand-new instances by default, which would
    # collide. So we force=True for the idempotent semantics here, or use
    # the registered classes directly. Spec calls for "Re-registration with
    # the same instance is idempotent" — that scenario fires when the
    # caller holds a reference. We verify force=True works without error.
    register_default_eval_runners(force=True)
    img2 = get_runner(MODALITY_EVAL_IMAGE_V1)
    # Both are EvalImageRunner instances (force replaces).
    assert isinstance(img1, EvalImageRunner)
    assert isinstance(img2, EvalImageRunner)


def test_register_default_eval_runners_force_overrides():
    register_default_eval_runners()
    first = get_runner(MODALITY_EVAL_IMAGE_V1)
    register_default_eval_runners(force=True)
    second = get_runner(MODALITY_EVAL_IMAGE_V1)
    # force=True replaces with a new instance — so the references differ.
    assert first is not second
    assert isinstance(second, EvalImageRunner)


def test_register_default_eval_runners_NOT_called_by_dispatch_default():
    """Plain dispatch on an eval modality without prior opt-in registration
    must raise KeyError ("modality not registered"). Audit § 12c R3 — eval
    runners require GEMINI_API_KEY at the EvalNode layer, so they are NOT
    in the auto-bootstrapped set."""
    from recoil.pipeline.core.dispatch import dispatch
    from recoil.pipeline.core.dispatch_context import DispatchContext

    class _SR:
        _dispatch_path = "x"
        def execute_keyframe(self, **kw):  # required for bootstrap
            raise NotImplementedError
        def execute_video(self, **kw):
            raise NotImplementedError

    ctx = DispatchContext(
        caller_id="test", step_runner=_SR(),
        receipts_log_path="DISABLED",
    )
    with pytest.raises(KeyError, match="eval_image_v1"):
        dispatch("eval_image_v1", {"shot_id": "S", "artifact_path": "/tmp/x.png",
                                    "rubric": "r"}, context=ctx)


def test_register_default_eval_runners_modality_strings_match_constants():
    register_default_eval_runners()
    assert get_runner("eval_image_v1").modality == MODALITY_EVAL_IMAGE_V1
    assert get_runner("eval_video_v1").modality == MODALITY_EVAL_VIDEO_V1
    assert get_runner("eval_audio_v1").modality == MODALITY_EVAL_AUDIO_V1


def test_runners_init_register_eval_runners_alias():
    """`from pipeline.core.runners import register_eval_runners` is the
    opt-in surface — symmetric with the deprecated `register_default_runners`
    re-export. Should register the same three modalities."""
    from recoil.pipeline.core.runners import register_eval_runners
    assert not is_registered(MODALITY_EVAL_IMAGE_V1)
    register_eval_runners()
    assert is_registered(MODALITY_EVAL_IMAGE_V1)
    assert is_registered(MODALITY_EVAL_VIDEO_V1)
    assert is_registered(MODALITY_EVAL_AUDIO_V1)


def test_runners_init_imports_runner_classes():
    """Direct re-export of the three classes + GeminiVisionEvalNode from
    pipeline.core.runners."""
    from recoil.pipeline.core.runners import (
        EvalImageRunner as ER1,
        EvalVideoRunner as ER2,
        EvalAudioRunner as ER3,
        GeminiVisionEvalNode as GVN,
    )
    assert ER1 is EvalImageRunner
    assert ER2 is EvalVideoRunner
    assert ER3 is EvalAudioRunner
    assert callable(GVN)


def test_dispatch_eval_image_v1_after_registration_works(tmp_path):
    """End-to-end: register eval runners + an EvalNode + dispatch image eval."""
    from recoil.pipeline.core.dispatch import dispatch
    from recoil.pipeline.core.dispatch_context import DispatchContext
    from recoil.pipeline.core.eval import (
        EvalContext, EvalResult, register_eval_node,
        _reset_eval_registry_for_tests,
    )

    _reset_eval_registry_for_tests()

    class _Judge:
        judge_id = "eval_image_v1"
        model_used = "gemini-3.1-pro-preview"
        def evaluate(self, ctx: EvalContext) -> EvalResult:
            return EvalResult(
                score=0.91, reasoning="ok", judge_id=self.judge_id,
                model_used=self.model_used, cost_usd=0.001,
            )

    register_default_eval_runners()
    register_eval_node("eval_image_v1", _Judge())

    class _SR:
        _dispatch_path = "test"
        def execute_keyframe(self, **kw):
            raise NotImplementedError
        def execute_video(self, **kw):
            raise NotImplementedError

    ctx = DispatchContext(
        caller_id="test", step_runner=_SR(),
        receipts_log_path="DISABLED",
    )
    receipt = dispatch("eval_image_v1", {
        "shot_id": "S", "artifact_path": str(tmp_path / "fake.png"),
        "rubric": "Score 0-1.",
    }, context=ctx)
    assert receipt.modality == "eval_image_v1"
    assert receipt.run_result.success is True
    md = receipt.run_result.metadata
    assert md["eval_score"] == 0.91
    assert md["eval_reasoning"] == "ok"
    assert md["judge_id"] == "eval_image_v1"

    _reset_eval_registry_for_tests()


def test_dispatch_eval_video_v1_after_registration_works(tmp_path):
    from recoil.pipeline.core.dispatch import dispatch
    from recoil.pipeline.core.dispatch_context import DispatchContext
    from recoil.pipeline.core.eval import (
        EvalContext, EvalResult, register_eval_node,
        _reset_eval_registry_for_tests,
    )

    _reset_eval_registry_for_tests()

    class _Judge:
        judge_id = "eval_video_v1"
        model_used = "gemini-3.1-pro-preview"
        def evaluate(self, ctx: EvalContext) -> EvalResult:
            return EvalResult(
                score=0.66, reasoning="motion ok", judge_id=self.judge_id,
                model_used=self.model_used, cost_usd=0.002,
            )

    register_default_eval_runners()
    register_eval_node("eval_video_v1", _Judge())

    class _SR:
        _dispatch_path = "test"
        def execute_keyframe(self, **kw):
            raise NotImplementedError
        def execute_video(self, **kw):
            raise NotImplementedError

    ctx = DispatchContext(
        caller_id="test", step_runner=_SR(),
        receipts_log_path="DISABLED",
    )
    receipt = dispatch("eval_video_v1", {
        "shot_id": "S", "artifact_path": str(tmp_path / "v.mp4"),
        "rubric": "Score 0-1.",
    }, context=ctx)
    assert receipt.run_result.success is True
    assert receipt.run_result.metadata["eval_score"] == 0.66

    _reset_eval_registry_for_tests()


def test_dispatch_eval_audio_v1_after_registration_works(tmp_path):
    from recoil.pipeline.core.dispatch import dispatch
    from recoil.pipeline.core.dispatch_context import DispatchContext
    from recoil.pipeline.core.eval import (
        EvalContext, EvalResult, register_eval_node,
        _reset_eval_registry_for_tests,
    )

    _reset_eval_registry_for_tests()

    class _Judge:
        judge_id = "eval_audio_v1"
        model_used = "gemini-3.1-pro-preview"
        def evaluate(self, ctx: EvalContext) -> EvalResult:
            return EvalResult(
                score=0.4, reasoning="ok-ish", judge_id=self.judge_id,
                model_used=self.model_used, cost_usd=0.0005,
            )

    register_default_eval_runners()
    register_eval_node("eval_audio_v1", _Judge())

    class _SR:
        _dispatch_path = "test"
        def execute_keyframe(self, **kw):
            raise NotImplementedError
        def execute_video(self, **kw):
            raise NotImplementedError

    ctx = DispatchContext(
        caller_id="test", step_runner=_SR(),
        receipts_log_path="DISABLED",
    )
    receipt = dispatch("eval_audio_v1", {
        "shot_id": "S", "artifact_path": str(tmp_path / "a.mp3"),
        "rubric": "Score 0-1.",
    }, context=ctx)
    assert receipt.run_result.success is True
    assert receipt.run_result.metadata["eval_score"] == 0.4

    _reset_eval_registry_for_tests()