"""REC-235 Phase 0 — atom-version identity URNs + Take.provenance.

Behavioral tests for the beat-grain atom-version addressing scheme added to
``recoil/pipeline/core/take.py``: the pure URN builder/parser helpers
(``atom_urn`` / ``atom_version_urn`` / ``parse_atom_urn`` /
``parse_atom_version_urn``, one shared token grammar) and the additive,
legacy-safe ``Take.provenance`` field. Also pins the append-only invariant:
appending a Take never mutates a prior atom-version's serialized body.
"""
import json
import sys
import pathlib

sys.path.insert(0, str(pathlib.Path(__file__).resolve().parent.parent.parent.parent))
from recoil.core.paths import ensure_pipeline_importable  # noqa: E402
ensure_pipeline_importable()

import pytest  # noqa: E402

from recoil.pipeline.core.take import (  # noqa: E402
    Beat,
    Take,
    atom_urn,
    atom_version_urn,
    parse_atom_urn,
    parse_atom_version_urn,
)
from recoil.pipeline.core.workflow import Workflow  # noqa: E402


def _wf(workflow_id: str = "wf0") -> Workflow:
    """Minimal valid Workflow — empty steps is explicitly allowed at construction."""
    return Workflow(workflow_id=workflow_id, steps=[])


# ── URN builder/parser round-trips ─────────────────────────────────────


def test_atom_version_urn_build_and_round_trip():
    assert atom_version_urn("EP001", "EP001_SH02", 12) == "atom:EP001/beat/EP001_SH02@t12"
    assert parse_atom_version_urn("atom:EP001/beat/EP001_SH02@t12") == ("EP001", "EP001_SH02", 12)


def test_parse_atom_version_urn_rejects_missing_take_segment():
    # A beat-grain URN (no @tN) is NOT an atom-version URN — fail loud.
    with pytest.raises(ValueError):
        parse_atom_version_urn("atom:EP001/beat/EP001_SH02")


def test_atom_version_urn_rejects_negative_take_index():
    with pytest.raises(ValueError):
        atom_version_urn("EP001", "EP001_SH02", -1)


def test_parse_atom_urn_beat_grain_round_trip():
    assert atom_urn("EP001", "EP001_SH02") == "atom:EP001/beat/EP001_SH02"
    assert parse_atom_urn("atom:EP001/beat/EP001_SH02") == ("EP001", "EP001_SH02")


def test_parse_atom_urn_rejects_version_urn_and_malformed():
    # An atom-VERSION URN (carries @tN) is NOT a beat-grain URN.
    with pytest.raises(ValueError):
        parse_atom_urn("atom:EP001/beat/EP001_SH02@t12")
    # Garbage input.
    with pytest.raises(ValueError):
        parse_atom_urn("not-a-urn")


def test_builder_and_parser_grammar_agree():
    # The builder validates against the SAME [A-Za-z0-9_]+ grammar the parsers
    # enforce: a hyphen and a space are outside the grammar, so atom_urn cannot
    # emit a URN its own parser would reject.
    with pytest.raises(ValueError):
        atom_urn("EP-001", "A B")
    with pytest.raises(ValueError):
        atom_version_urn("EP-001", "A B", 0)


# ── Take.provenance — additive, legacy-safe round-trip ─────────────────


def test_provenance_round_trips_and_batch_id_is_provenance_only():
    t = Take(
        take_id="EP001_SH02_take_0",
        take_index=0,
        workflow=_wf(),
        provenance={"batch_id": "b_abc", "scene_id": "ep001_sc02"},
    )
    dumped = t.to_dict()
    # batch_id is ORIGIN/provenance — it is NOT promoted to a top-level identity field.
    assert "batch_id" not in dumped
    assert dumped["provenance"] == {"batch_id": "b_abc", "scene_id": "ep001_sc02"}

    reloaded = Take.from_dict(dumped)
    assert reloaded.provenance == {"batch_id": "b_abc", "scene_id": "ep001_sc02"}


def test_legacy_take_dict_without_provenance_loads_empty():
    # A pre-REC-235 take dict has no "provenance" key; from_dict defaults it to {}.
    t = Take(take_id="EP001_SH02_take_0", take_index=0, workflow=_wf())
    legacy = t.to_dict()
    del legacy["provenance"]
    reloaded = Take.from_dict(legacy)
    assert reloaded.provenance == {}


def test_provenance_must_be_a_dict():
    with pytest.raises(TypeError):
        Take(take_id="t0", take_index=0, workflow=_wf(), provenance=["not", "a", "dict"])


# ── Append-only invariant ──────────────────────────────────────────────


def test_append_only_new_take_does_not_mutate_prior_take():
    beat = Beat(beat_id="EP001_SH02")
    take_0 = beat.new_take(workflow=_wf("wf0"))
    beat.new_take(workflow=_wf("wf1"))

    snapshot = json.dumps(take_0.to_dict(), sort_keys=True)
    beat.new_take(workflow=_wf("wf2"))
    # Appending a third take must not mutate take_0's serialized body.
    assert json.dumps(take_0.to_dict(), sort_keys=True) == snapshot
