"""CP-9 Phase 5 — Beat.select_primary("score") strategy coverage.

Replaces the CP-7 NotImplementedError stub. Covers:
  - highest aggregate_score wins
  - tie-break by take_index ASC
  - score-less takes sort BELOW scored takes (effectively ineligible)
  - all-None returns None and does NOT raise
  - other strategies (manual / first_success / unknown) preserved byte-stable
  - lazy compute on takes whose aggregate_score is None
  - pre-set aggregate_score values are honored without recomputation

Tests fabricate Workflow / WorkflowStep / GenerationReceipt — no live API,
no Phase 4 runner invocation.
"""

import sys
import pathlib

sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent.parent.parent))
from recoil.core.paths import ensure_pipeline_importable  # noqa: E402
ensure_pipeline_importable()

import pytest  # noqa: E402

from recoil.pipeline.core.receipts import GenerationReceipt  # noqa: E402
from recoil.pipeline.core.registry import RunResult  # noqa: E402
from recoil.pipeline.core.take import Beat, Take  # noqa: E402
from recoil.pipeline.core.workflow import Workflow, WorkflowStep  # noqa: E402


# ─────────────────────────────────────────────────────────────────────────
# Fixture helpers
# ─────────────────────────────────────────────────────────────────────────


def _make_receipt(*, modality="image_t2i", shot_id="X",
                  success=True, eval_scores=None) -> GenerationReceipt:
    rr = RunResult(
        id=f"{shot_id}_{modality}_1700000000",
        modality=modality,
        output_path="/tmp/x.png" if success else None,
        output_url=None,
        metadata={"cost_usd": 0.04},
        success=success,
        error=None if success else "boom",
    )
    return GenerationReceipt(
        receipt_id=f"rcpt_1_{shot_id}_{modality}",
        modality=modality,
        caller_id="test",
        project="tartarus",
        episode=1,
        shot_id=shot_id,
        timestamp_utc="2026-04-28T03:14:15Z",
        run_result=rr,
        provenance={},
        eval_scores=dict(eval_scores or {}),
    )


def _make_step(*, with_eval=None, status="succeeded") -> WorkflowStep:
    eval_scores = (
        {"panel_a": {"panel_score": with_eval, "judges": []}}
        if with_eval is not None
        else {}
    )
    rcpt = _make_receipt(eval_scores=eval_scores)
    step = WorkflowStep(
        step_id="kf",
        modality="image_t2i",
        payload={"shot_id": "X", "prompt": "p", "model": "nbp"},
    )
    step.receipt = rcpt
    step.status = status
    return step


def _make_workflow(*, with_eval=None, status="succeeded",
                   workflow_id="wf") -> Workflow:
    return Workflow(
        workflow_id=workflow_id,
        steps=[_make_step(with_eval=with_eval, status=status)],
    )


def _beat_with_scored_takes(beat_id, scores_and_statuses):
    """scores_and_statuses: list of (panel_score_or_None, take_status) pairs.
    Returns a Beat with one Take per entry, take_id auto-assigned."""
    b = Beat(beat_id=beat_id)
    for i, (score, status) in enumerate(scores_and_statuses):
        wf = _make_workflow(with_eval=score, status="succeeded",
                            workflow_id=f"wf_{i}")
        t = b.new_take(workflow=wf)
        t.status = status
    return b


# ─────────────────────────────────────────────────────────────────────────
# Score strategy — happy-path cases
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_single_succeeded_take_with_score():
    """Single take with a score → that take is selected."""
    b = _beat_with_scored_takes("b1", [(0.7, "succeeded")])
    chosen = b.select_primary(strategy="score")
    assert chosen == "b1_take_0"
    assert b.primary_take_id == "b1_take_0"
    assert b.primary_take is b.takes[0]


def test_select_primary_score_picks_highest():
    """Among 3 scored takes, the highest score wins."""
    b = _beat_with_scored_takes("b1", [
        (0.4, "succeeded"),
        (0.9, "succeeded"),
        (0.6, "succeeded"),
    ])
    assert b.select_primary(strategy="score") == "b1_take_1"
    assert b.primary_take_id == "b1_take_1"


def test_select_primary_score_tie_break_take_index_ASC():
    """Takes 0 and 2 both score 0.8; take 0 wins (lower take_index)."""
    b = _beat_with_scored_takes("b1", [
        (0.8, "succeeded"),
        (0.5, "succeeded"),
        (0.8, "succeeded"),
    ])
    assert b.select_primary(strategy="score") == "b1_take_0"


def test_select_primary_score_tie_break_take_index_three_way():
    """Three-way tie at 0.6 — take_index 0 wins."""
    b = _beat_with_scored_takes("b1", [
        (0.6, "succeeded"),
        (0.6, "succeeded"),
        (0.6, "succeeded"),
    ])
    assert b.select_primary(strategy="score") == "b1_take_0"


# ─────────────────────────────────────────────────────────────────────────
# No-eval / all-unscored / mixed paths
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_no_takes_returns_None_no_exception():
    """Empty Beat → None, no exception, primary_take_id stays None."""
    b = Beat(beat_id="b1")
    result = b.select_primary(strategy="score")
    assert result is None
    assert b.primary_take_id is None


def test_select_primary_score_takes_without_eval_scores_None_returned():
    """All takes have no eval_scores → all aggregate_scores compute to None
    → no scored takes → returns None, primary_take_id unchanged."""
    b = _beat_with_scored_takes("b1", [
        (None, "succeeded"),
        (None, "succeeded"),
    ])
    assert b.select_primary(strategy="score") is None
    assert b.primary_take_id is None


def test_select_primary_score_mixed_scored_unscored():
    """Mix of scored + unscored: only scored takes are eligible. Unscored
    take with high `would-be` score is ignored."""
    b = _beat_with_scored_takes("b1", [
        (None, "succeeded"),  # unscored
        (0.5, "succeeded"),   # scored
        (None, "succeeded"),  # unscored
    ])
    assert b.select_primary(strategy="score") == "b1_take_1"


def test_select_primary_score_score_less_takes_below_scored_takes():
    """A 0.1 scored take outranks an unscored take with no eval at all."""
    b = _beat_with_scored_takes("b1", [
        (None, "succeeded"),
        (0.1, "succeeded"),
    ])
    # Even though 0.1 is barely positive, it beats None.
    assert b.select_primary(strategy="score") == "b1_take_1"


def test_select_primary_score_negative_score_still_beats_unscored():
    """Defensive: even a negative aggregate_score is "scored" (not None) and
    therefore eligible. Score-less takes still sort below."""
    b = Beat(beat_id="b1")
    # Take 0 — unscored (no eval on workflow steps)
    t0 = b.new_take(workflow=_make_workflow(with_eval=None, workflow_id="wf0"))
    t0.status = "succeeded"
    # Take 1 — pre-set negative aggregate_score (skips compute)
    t1 = b.new_take(workflow=_make_workflow(with_eval=None, workflow_id="wf1"))
    t1.status = "succeeded"
    t1.aggregate_score = -0.2
    assert b.select_primary(strategy="score") == "b1_take_1"


# ─────────────────────────────────────────────────────────────────────────
# Lazy compute / pre-set honor
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_aggregate_score_computed_lazily():
    """Takes with aggregate_score=None get compute_aggregate_score called
    during select_primary. After call, t.aggregate_score is set."""
    b = _beat_with_scored_takes("b1", [(0.55, "succeeded")])
    # Pre-condition
    assert b.takes[0].aggregate_score is None
    b.select_primary(strategy="score")
    # Post: lazy compute populated it.
    assert b.takes[0].aggregate_score == pytest.approx(0.55)


def test_select_primary_score_aggregate_score_already_set_not_recomputed():
    """If aggregate_score is pre-set to a value, the eval_scores on the
    workflow steps are NOT consulted (no recompute). The pre-set value is
    used as-is."""
    b = Beat(beat_id="b1")
    # Workflow has eval_scores giving panel_score=0.9 — but we pre-set agg=0.1.
    wf = _make_workflow(with_eval=0.9, workflow_id="wf0")
    t = b.new_take(workflow=wf)
    t.status = "succeeded"
    t.aggregate_score = 0.1  # caller's manual override
    # Add a competing take with no pre-set, real score 0.2.
    wf2 = _make_workflow(with_eval=0.2, workflow_id="wf1")
    t2 = b.new_take(workflow=wf2)
    t2.status = "succeeded"
    chosen = b.select_primary(strategy="score")
    # Take 0's pre-set 0.1 < computed 0.2 → take 1 wins.
    # Critically: take 0's aggregate_score MUST NOT have been recomputed
    # (otherwise it would be 0.9 and would win).
    assert chosen == "b1_take_1"
    assert b.takes[0].aggregate_score == pytest.approx(0.1)  # unchanged
    assert b.takes[1].aggregate_score == pytest.approx(0.2)


# ─────────────────────────────────────────────────────────────────────────
# State mutations / return value
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_sets_primary_take_id_to_chosen():
    """After a successful pick, beat.primary_take_id matches the chosen id."""
    b = _beat_with_scored_takes("b1", [(0.3, "succeeded"), (0.8, "succeeded")])
    assert b.primary_take_id is None
    b.select_primary(strategy="score")
    assert b.primary_take_id == "b1_take_1"


def test_select_primary_score_returns_chosen_take_id():
    """select_primary returns the chosen take_id string (matches CP-7
    first_success contract)."""
    b = _beat_with_scored_takes("b1", [(0.3, "succeeded"), (0.8, "succeeded")])
    result = b.select_primary(strategy="score")
    assert isinstance(result, str)
    assert result == "b1_take_1"


def test_select_primary_score_no_scored_takes_does_not_clobber_primary():
    """If no take has a score, primary_take_id is left unchanged (parallels
    first_success when nothing succeeded — sticky primary)."""
    b = _beat_with_scored_takes("b1", [(None, "succeeded")])
    b.primary_take_id = "b1_take_0"  # caller set this previously
    assert b.select_primary(strategy="score") is None
    assert b.primary_take_id == "b1_take_0"


# ─────────────────────────────────────────────────────────────────────────
# Status independence — score-based selection ignores Take.status
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_failed_takes_with_scores_still_eligible():
    """CP-9: score-based picks the highest-scoring take regardless of
    Take.status. A failed take with the highest score wins. (Distinct from
    first_success which requires status=='succeeded'.)"""
    b = _beat_with_scored_takes("b1", [
        (0.4, "succeeded"),
        (0.9, "failed"),  # best score, but failed status
    ])
    assert b.select_primary(strategy="score") == "b1_take_1"


def test_select_primary_score_partial_takes_with_scores_eligible():
    """A partial-status take is eligible if it has a score."""
    b = _beat_with_scored_takes("b1", [
        (0.3, "succeeded"),
        (0.7, "partial"),
    ])
    assert b.select_primary(strategy="score") == "b1_take_1"


# ─────────────────────────────────────────────────────────────────────────
# Other strategies preserved (no regression on CP-7 byte-stable branches)
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_does_not_affect_first_success_strategy():
    """Re-running the same Beat with strategy='first_success' still works
    per CP-7 — picks the first succeeded take by take_index ASC."""
    b = _beat_with_scored_takes("b1", [
        (0.9, "failed"),     # best score, failed
        (0.2, "succeeded"),  # lowest score, succeeded
    ])
    # Score strategy: take 0 wins (0.9)
    assert b.select_primary(strategy="score") == "b1_take_0"
    # Reset primary, run first_success
    b.primary_take_id = None
    assert b.select_primary(strategy="first_success") == "b1_take_1"


def test_select_primary_score_does_not_affect_manual_strategy():
    """Manual strategy returns the current primary_take_id unchanged,
    independent of any score state."""
    b = _beat_with_scored_takes("b1", [(0.9, "succeeded"), (0.2, "succeeded")])
    b.primary_take_id = "b1_take_1"  # caller picked the lower-scored one
    assert b.select_primary(strategy="manual") == "b1_take_1"
    # primary unchanged
    assert b.primary_take_id == "b1_take_1"


def test_select_primary_unknown_strategy_still_raises_ValueError():
    """The CP-7 ValueError raise for unknown strategies is preserved
    byte-stable."""
    b = _beat_with_scored_takes("b1", [(0.5, "succeeded")])
    with pytest.raises(ValueError, match="Unknown primary-selection strategy"):
        b.select_primary(strategy="bogus")  # type: ignore


# ─────────────────────────────────────────────────────────────────────────
# Edge cases
# ─────────────────────────────────────────────────────────────────────────


def test_select_primary_score_no_steps_with_receipts_returns_None():
    """All workflow steps have status='pending' / receipt=None → no scored
    takes → None."""
    b = Beat(beat_id="b1")
    for i in range(2):
        # Workflow has step but no receipt attached (CP-7 build path)
        wf = Workflow(
            workflow_id=f"wf_{i}",
            steps=[WorkflowStep(
                step_id="kf",
                modality="image_t2i",
                payload={"shot_id": "X", "prompt": "p", "model": "nbp"},
            )],
        )
        t = b.new_take(workflow=wf)
        t.status = "pending"
    assert b.select_primary(strategy="score") is None


def test_select_primary_score_high_take_index_with_higher_score_wins():
    """take_index doesn't matter for selection — only for tie-break.
    take_index=2 with score 0.99 beats take_index=0 with score 0.5."""
    b = _beat_with_scored_takes("b1", [
        (0.5, "succeeded"),
        (0.6, "succeeded"),
        (0.99, "succeeded"),
    ])
    assert b.select_primary(strategy="score") == "b1_take_2"


def test_select_primary_score_externally_constructed_beat_with_custom_indices():
    """Externally-constructed Beat with non-monotonic take_index values:
    score still picks highest; tie-break uses provided take_index ASC."""
    b = Beat(beat_id="b1")
    # take_index=5 with score 0.7
    wf_high = _make_workflow(with_eval=0.7, workflow_id="wf_high")
    t_high = Take(take_id="custom_5", take_index=5, workflow=wf_high)
    t_high.status = "succeeded"
    # take_index=2 with score 0.7 (tie)
    wf_low = _make_workflow(with_eval=0.7, workflow_id="wf_low")
    t_low = Take(take_id="custom_2", take_index=2, workflow=wf_low)
    t_low.status = "succeeded"
    b.add_take(t_high)
    b.add_take(t_low)
    # Tie at 0.7 → take_index 2 wins.
    assert b.select_primary(strategy="score") == "custom_2"