"""Tests for derivation_sha (structural vs content fingerprints) + atomic _save_json.

The load-bearing property: a content-only re-roll (new prose, same shots/boundaries/
locations/grouping-inputs) keeps plan_structural_sha IDENTICAL but bumps content_sha —
so approved boards/passes survive a creative re-roll, but any structural change to the
plan (shot_type, env-only, duration, location, scene_index, character membership,
camera_side, shot count, sublocation) invalidates the cached grouping/passes.
"""
from __future__ import annotations

import copy
import json
import os

import pytest

from recoil.pipeline._lib.derivation_sha import (
    GROUPING_PARAM_VERSION,
    board_content_freshness_sha,
    content_sha,
    plan_structural_sha,
    shotset_hash,
)
from recoil.pipeline.orchestrator.ingest_pipeline import IngestPipeline


def _make_plan() -> dict:
    """A minimal 2-shot EpisodePlan dict with the real nested ShotRecord shape
    (routing_data / prompt_data / asset_data / spatial_data). Shot 0 carries two
    characters so character-order-insensitivity is testable."""
    return {
        "episode_id": "EP001",
        "project": "tartarus",
        "total_shots": 2,
        "shots": [
            {
                "shot_id": "EP001_SH01",
                "scene_index": 1,
                "source_text": "INT. SALVAGE BAY - NIGHT. JADE pries the loose panel free.",
                "routing_data": {
                    "target_editorial_duration_s": 6,
                    "is_env_only": False,
                    "num_characters": 2,
                },
                "prompt_data": {
                    "shot_type": "MS",
                    "kinetic_action": "hand wrenches the panel free",
                    "prompt_skeleton": {
                        "subject_line": "JADE crouched, both hands gripping a loose panel",
                        "environment_line": "cramped salvage bay, emergency strip light",
                        "action_line": "panel torques under her grip",
                        "emotion_line": "grim focus",
                    },
                },
                "spatial_data": {"camera_side": "A"},
                "asset_data": {
                    "location_id": "salvage_bay",
                    "characters": [
                        {"char_id": "JADE", "wardrobe_phase_id": "ph1"},
                        {"char_id": "KETCH", "wardrobe_phase_id": "ph1"},
                    ],
                },
            },
            {
                "shot_id": "EP001_SH02",
                "scene_index": 1,
                "source_text": "WREN watches from the hatch.",
                "routing_data": {
                    "target_editorial_duration_s": 4,
                    "is_env_only": False,
                    "num_characters": 1,
                },
                "prompt_data": {
                    "shot_type": "CU",
                    "kinetic_action": "eyes flick to the readout",
                    "prompt_skeleton": {
                        "subject_line": "WREN at the hatch, half-lit",
                        "environment_line": "hatch frame, cold key light",
                        "action_line": "a slow blink",
                        "emotion_line": "wary",
                    },
                },
                "spatial_data": {"camera_side": "B"},
                "asset_data": {
                    "location_id": "salvage_bay",
                    "characters": [
                        {"char_id": "WREN", "wardrobe_phase_id": "ph1"},
                    ],
                },
            },
        ],
    }


def test_content_only_reroll_preserves_structural_sha():
    """LOAD-BEARING: change ONLY creative/content fields → structural_sha UNCHANGED,
    content_sha CHANGED."""
    plan = _make_plan()
    s0 = plan_structural_sha(plan)
    c0 = content_sha(plan)

    reroll = copy.deepcopy(plan)
    # source_text (top-level prose) + prompt_skeleton.subject_line + a camera-prose field
    reroll["shots"][0]["source_text"] = "A wholly re-rolled line of creative prose."
    reroll["shots"][0]["prompt_data"]["prompt_skeleton"]["subject_line"] = (
        "JADE, the very same blocking but reframed creatively"
    )
    reroll["shots"][0]["prompt_data"]["kinetic_action"] = "an entirely different camera-prose beat"

    assert plan_structural_sha(reroll) == s0  # structural fingerprint survives the re-roll
    assert content_sha(reroll) != c0          # content fingerprint reflects the new prose


def test_board_content_freshness_helper():
    spans = {"EP001_SH02": "span-b", "EP001_SH01": "span-a"}
    reordered = {"EP001_SH01": "span-a", "EP001_SH02": "span-b"}
    changed = {"EP001_SH01": "span-a", "EP001_SH02": "span-c"}

    assert board_content_freshness_sha(spans) == board_content_freshness_sha(reordered)
    assert board_content_freshness_sha(spans) != board_content_freshness_sha(changed)


def test_structural_change_bumps_structural_sha():
    """Each structural mutation bumps plan_structural_sha; character REORDER does not."""
    plan = _make_plan()
    base = plan_structural_sha(plan)

    def mutated(fn) -> str:
        p = copy.deepcopy(plan)
        fn(p)
        return plan_structural_sha(p)

    # shot_type MS -> ECU
    assert mutated(lambda p: p["shots"][0]["prompt_data"].update({"shot_type": "ECU"})) != base
    # is_env_only flip
    assert mutated(lambda p: p["shots"][0]["routing_data"].update({"is_env_only": True})) != base
    # target_editorial_duration_s +5
    assert mutated(
        lambda p: p["shots"][0]["routing_data"].update({"target_editorial_duration_s": 11})
    ) != base
    # asset_data.location_id
    assert mutated(lambda p: p["shots"][0]["asset_data"].update({"location_id": "engine_room"})) != base
    # scene_index
    assert mutated(lambda p: p["shots"][0].update({"scene_index": 2})) != base
    # character membership swap JADE -> WREN (set changes)
    assert mutated(
        lambda p: p["shots"][0]["asset_data"]["characters"][0].update({"char_id": "WREN"})
    ) != base
    # spatial_data.camera_side A -> B
    assert mutated(lambda p: p["shots"][0]["spatial_data"].update({"camera_side": "B"})) != base
    # drop a shot (count change)
    assert mutated(lambda p: p["shots"].pop()) != base

    # Character ORDER must NOT matter — the slice sorts char_ids.
    reordered = copy.deepcopy(plan)
    reordered["shots"][0]["asset_data"]["characters"].reverse()
    assert plan_structural_sha(reordered) == base


def test_sublocation_forward_compat():
    """Adding a sublocation_id bumps structural_sha (D6 invalidation); a plan with no
    sublocation anywhere hashes stably across runs."""
    plan = _make_plan()
    base = plan_structural_sha(plan)

    # Stable across runs while the field is absent everywhere.
    assert plan_structural_sha(copy.deepcopy(plan)) == base

    # Forward-compat field via the per-shot key.
    with_subloc = copy.deepcopy(plan)
    with_subloc["shots"][0]["sublocation_id"] = "bay_aft"
    assert plan_structural_sha(with_subloc) != base

    # ...and via the spatial_data.sublocation fallback the module also reads.
    with_subloc_spatial = copy.deepcopy(plan)
    with_subloc_spatial["shots"][0]["spatial_data"]["sublocation"] = "bay_aft"
    assert plan_structural_sha(with_subloc_spatial) != base


def _save_json(path, data) -> None:
    """Invoke the LIVE IngestPipeline._save_json without the heavy constructor (which
    creates real per-project state dirs). The method reads no instance state, so a
    throwaway `self` exercises the exact same atomic-write body."""
    IngestPipeline._save_json(object(), path, data)


def test_atomic_save_json(tmp_path, monkeypatch):
    """Happy path writes a parseable file; a failing os.replace leaves NO partial file
    at the target and cleans up the tmp."""
    # --- success path ---
    target = tmp_path / "out" / "artifact.json"
    data = {"episode_id": "EP001", "shots": [1, 2, 3]}
    _save_json(target, data)
    assert target.exists()
    assert json.loads(target.read_text()) == data

    # --- failure path: os.replace raises ---
    fail_dir = tmp_path / "fail"
    fail_target = fail_dir / "artifact.json"

    def _boom(*_a, **_k):
        raise RuntimeError("simulated os.replace failure")

    monkeypatch.setattr(os, "replace", _boom)

    with pytest.raises(RuntimeError):
        _save_json(fail_target, data)

    assert not fail_target.exists()           # no partial artifact at the target path
    assert list(fail_dir.iterdir()) == []     # tmp was unlinked — nothing left behind


def test_grouping_param_version_is_hashed():
    """GROUPING_PARAM_VERSION participates in the structural fingerprint, so a heuristic
    bump invalidates cached grouping even with identical plan shots."""
    plan = _make_plan()
    base = plan_structural_sha(plan)
    # Sanity: the constant is exported and an int (consumed by the manifest in later phases).
    assert isinstance(GROUPING_PARAM_VERSION, int)
    # The version is folded into the payload — recomputing yields the same sha deterministically.
    assert plan_structural_sha(copy.deepcopy(plan)) == base


def test_shotset_hash_order_independent():
    assert shotset_hash(["C", "A", "B"]) == shotset_hash(["A", "B", "C"])


def test_shotset_hash_dedupes():
    assert shotset_hash(["A", "A", "B"]) == shotset_hash(["A", "B"])


def test_shotset_hash_distinguishes_sets():
    assert shotset_hash(["A", "B"]) != shotset_hash(["A", "B", "C"])


def test_shotset_hash_excludes_wildcard():
    assert shotset_hash(["A", "wildcard", "B"]) == shotset_hash(["A", "B"])


def test_shotset_hash_prefix():
    assert shotset_hash(["A", "B"]).startswith("sha256:")


def test_shotset_hash_empty():
    assert shotset_hash([]) == shotset_hash(["wildcard", "", None])
