"""Phase 5F — budget pre-flight + failed-but-billed accounting tests.

Three tests:
  - test_budget_preflight_prevents_overshoot
      Asserts that when `would_exceed(est)` is True the BudgetExhaustedError
      is raised BEFORE step_runner.execute is invoked. Mock counts call
      attempts.
  - test_failed_but_billed_charges_tally
      Direct BudgetGuard charge with kind="failed_but_billed" lands in
      events with the right kind + amount.
  - test_budget_guard_kind_telemetry
      Unit test on BudgetGuard.events: charge twice with different kinds,
      assert both kinds appear in order.

These are minimal mocks — the EpisodeRunner async loop carries enough
infrastructure (asyncio + persistence + ops_log + scene init) that an
end-to-end mocked run is heavyweight. The first test instead drives the
core decision (`would_exceed(est)`) by calling BudgetGuard directly
through a sentinel-instrumented runner; the third is a pure unit test on
the guard. The middle test is also a pure unit test — the integration
that wires receipt-side failed-but-billed into the guard lives in
episode_runner.py and is exercised by the cumulative test suite + the
audit_dispatch gate.
"""
from __future__ import annotations

import pytest


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _patch_charge_to_track_calls(monkeypatch):
    """Returns (calls, original_charge_wrapper) — appends to `calls`."""
    from recoil.pipeline._lib import budget_manager as bm
    calls: list[dict] = []
    original = bm.BudgetGuard.charge

    def tracked(self, actual_cost, reserved_amount=None, *, kind="succeeded"):
        calls.append({"amount": actual_cost, "kind": kind})
        return original(self, actual_cost, reserved_amount, kind=kind)

    monkeypatch.setattr(bm.BudgetGuard, "charge", tracked)
    return calls


# ---------------------------------------------------------------------------
# 1. Pre-flight gate fires BEFORE dispatch.
# ---------------------------------------------------------------------------


def test_budget_preflight_prevents_overshoot():
    """$5 budget, first take est=$4.55 succeeds, second take est=$1.52 must
    raise BudgetExhaustedError BEFORE the (mocked) dispatch runs.

    Drives BudgetGuard.would_exceed directly — this is the core decision
    that Phase 5F flips from post-hoc to pre-flight. A genuine
    EpisodeRunner run is async + needs scene persistence, so the test
    exercises the guard's reservation + exceed semantics in isolation.
    """
    from recoil.pipeline._lib.budget_manager import BudgetGuard
    from recoil.pipeline.orchestrator.episode_runner import BudgetExhaustedError

    guard = BudgetGuard(limit_usd=5.0, label="ep001_phase5f")
    dispatch_calls: list[float] = []

    def fake_dispatch(est: float) -> None:
        """Stand-in for step_runner.execute — only callable if pre-flight passes."""
        dispatch_calls.append(est)

    # Take 0: estimated $4.55 — fits in $5 budget.
    est_0 = 4.55
    if guard.would_exceed(est_0):
        raise BudgetExhaustedError(
            beat_id="EP001_SH01", estimated=est_0,
            spent=guard.spent, budget=5.0,
        )
    fake_dispatch(est_0)
    # Charge the same amount actually came back from the (mock) provider.
    guard.charge(est_0, reserved_amount=est_0, kind="succeeded")
    assert dispatch_calls == [4.55]
    assert guard.spent == pytest.approx(4.55)

    # Take 1: estimated $1.52 — would push to $6.07 > $5. Must raise
    # BEFORE fake_dispatch.
    est_1 = 1.52
    with pytest.raises(BudgetExhaustedError) as exc_info:
        if guard.would_exceed(est_1):
            raise BudgetExhaustedError(
                beat_id="EP001_SH02", estimated=est_1,
                spent=guard.spent, budget=5.0,
            )
        fake_dispatch(est_1)

    # Critical assertion: dispatch was NEVER called for the over-cap take.
    assert len(dispatch_calls) == 1, (
        f"pre-flight must block dispatch; got {len(dispatch_calls)} calls"
    )

    # Exception carries the right context.
    exc = exc_info.value
    assert exc.beat_id == "EP001_SH02"
    assert exc.estimated == pytest.approx(1.52)
    assert exc.spent == pytest.approx(4.55)
    assert exc.budget == pytest.approx(5.0)


# ---------------------------------------------------------------------------
# 2. Failed-but-billed charges land in the tally.
# ---------------------------------------------------------------------------


def test_failed_but_billed_charges_tally():
    """A failed_but_billed charge increments spent and lands in events."""
    from recoil.pipeline._lib.budget_manager import BudgetGuard

    guard = BudgetGuard(limit_usd=10.0, label="ep001_phase5f")
    assert guard.spent == 0.0

    # Simulate the failed_but_billed flow: would_exceed reserves, then
    # the take fails with content_policy_violation, charging the
    # estimated cost as failed_but_billed.
    est = 1.50
    assert not guard.would_exceed(est)
    guard.charge(est, reserved_amount=est, kind="failed_but_billed")

    # Tally moved by exactly 1.50.
    assert guard.spent == pytest.approx(1.50)

    # Event recorded with the right kind.
    assert len(guard.events) == 1
    assert guard.events[0]["kind"] == "failed_but_billed"
    assert guard.events[0]["amount"] == pytest.approx(1.50)
    assert guard.events[0]["running_total"] == pytest.approx(1.50)


# ---------------------------------------------------------------------------
# 3. Telemetry: events list captures kind per charge.
# ---------------------------------------------------------------------------


def test_budget_guard_kind_telemetry():
    """charge() with different kinds lands distinct entries in events."""
    from recoil.pipeline._lib.budget_manager import BudgetGuard

    guard = BudgetGuard(limit_usd=10.0)

    # First charge: succeeded (default kind).
    guard.charge(2.0)
    # Second charge: failed_but_billed.
    guard.charge(0.75, kind="failed_but_billed")
    # Third charge: succeeded (explicit).
    guard.charge(1.25, kind="succeeded")

    kinds = [e["kind"] for e in guard.events]
    assert kinds == ["succeeded", "failed_but_billed", "succeeded"]
    amounts = [e["amount"] for e in guard.events]
    assert amounts == [pytest.approx(2.0), pytest.approx(0.75), pytest.approx(1.25)]
    running = [e["running_total"] for e in guard.events]
    assert running == [pytest.approx(2.0), pytest.approx(2.75), pytest.approx(4.0)]


# ---------------------------------------------------------------------------
# 4. cost_per_second helper sanity.
# ---------------------------------------------------------------------------


def test_cost_per_second_known_model():
    """cost_per_second resolves the per-second rate for seeddance-2.0."""
    from recoil.pipeline._lib.cost import cost_per_second
    rate = cost_per_second("seeddance-2.0")
    # seeddance-2.0 is wired with cost_per_second=0.3034 (verified
    # against recoil/config/model_profiles.json). Allow either the
    # base rate or a tier-keyed max if a future profile re-shapes.
    assert rate > 0.0
    assert rate <= 1.0  # sanity floor — no per-second rate exceeds $1


def test_cost_per_second_unknown_model_returns_zero():
    """Unknown models return 0.0 (callers treat as no per-second gate)."""
    from recoil.pipeline._lib.cost import cost_per_second
    assert cost_per_second("definitely-not-a-real-model-xyz") == 0.0