"""Tests for pipeline/tools/reclaim_orphans.py (Phase 8)."""

from __future__ import annotations

import json
import sys
from pathlib import Path

# Ensure the pipeline/tools importable path works from the recoil root
_RECOIL_ROOT = Path(__file__).resolve().parents[3]
if str(_RECOIL_ROOT) not in sys.path:
    sys.path.insert(0, str(_RECOIL_ROOT))

from recoil.pipeline.tools import reclaim_orphans as R  # noqa: E402


# ── parse_orphan_filename ────────────────────────────────────────


def test_parse_regen_p02_multishot():
    out = R.parse_orphan_filename("REGEN_P02_multishot_take1")
    assert out["shot_id"] == "REGEN_P02_multishot"
    assert out["take_number"] == 1
    assert out["hints"]["pipeline"] == "multishot"


def test_parse_shot_prefix_stripped():
    out = R.parse_orphan_filename("shot_DEER_MV2_01_take1")
    assert out["shot_id"] == "DEER_MV2_01"
    assert out["take_number"] == 1


def test_parse_versioned_stripped():
    out = R.parse_orphan_filename("shot_DEER_V01_01_v2_take1")
    assert out["shot_id"] == "DEER_V01_01"
    assert out["hints"]["versioned"] is True


def test_parse_model_hint_seedance():
    out = R.parse_orphan_filename("shot_OVERNIGHT_SEQ09D_dial_911_cu_seedance_take1")
    assert out["shot_id"] == "OVERNIGHT_SEQ09D_dial_911_cu"
    assert out["hints"]["model"] == "seedance"
    assert out["take_number"] == 1


def test_parse_regen_p04_r2v_loose():
    out = R.parse_orphan_filename("REGEN_P04_r2v_loose_take1")
    assert out["shot_id"] == "REGEN_P04_r2v_loose"
    assert out["hints"]["pipeline"] == "r2v"


def test_parse_take_with_trailing_version_and_model():
    """Trailing _v{N} and _model suffixes after _take{N} must still be parsed.

    Regression: previously the take regex used a `$` anchor and ran first, so
    a filename like shot_123_take1_v2_kling silently fell back to take=1 and
    left _take1_v2 in the shot_id. Strip order is now MODEL → VERSION → TAKE.
    """
    out = R.parse_orphan_filename("shot_123_take1_v2_kling")
    assert out["take_number"] == 1
    assert out["hints"]["model"] == "kling"
    assert out["hints"]["versioned"] is True
    assert out["shot_id"] == "123"

    # And verify a no-take filename still defaults to take=1 cleanly.
    out2 = R.parse_orphan_filename("shot_123_v2_kling")
    assert out2["take_number"] == 1
    assert out2["hints"]["model"] == "kling"
    assert out2["hints"]["versioned"] is True
    assert out2["shot_id"] == "123"


# ── parse_mp4_sidecar ────────────────────────────────────────────


def test_sidecar_populated_returns_fields(tmp_path):
    sc = tmp_path / "X.mp4.json"
    sc.write_text(
        json.dumps(
            {
                "schema_version": 1,
                "source": "pipeline",
                "provenance": {
                    "model": "kling-o3",
                    "prompt": "test prompt",
                    "cost": 0.63,
                    "generation_params": {
                        "duration": 5,
                        "aspect_ratio": "16:9",
                        "mode": "image2video",
                    },
                    "shot_id": "X",
                    "pipeline": "video",
                },
            }
        )
    )
    out = R.parse_mp4_sidecar(sc)
    assert out["model"] == "kling-o3"
    assert out["prompt"] == "test prompt"
    assert out["duration"] == 5
    assert out["mode"] == "image2video"


def test_sidecar_stub_returns_empty(tmp_path):
    sc = tmp_path / "Y.mp4.json"
    sc.write_text(json.dumps({"provenance": {}}))
    assert R.parse_mp4_sidecar(sc) == {}


def test_sidecar_missing_returns_empty(tmp_path):
    assert R.parse_mp4_sidecar(tmp_path / "NOPE.mp4.json") == {}


# ── classify_confidence ──────────────────────────────────────────


def test_confidence_high_with_sidecar():
    parsed = {"shot_id": "X", "take_number": 1, "hints": {}}
    assert (
        R.classify_confidence(
            {"model": "kling-o3", "prompt": "x"}, {"duration": 5}, parsed
        )
        == "high"
    )


def test_confidence_medium_with_filename_and_ffprobe():
    parsed = {"shot_id": "X", "take_number": 1, "hints": {}}
    assert R.classify_confidence({}, {"duration": 5}, parsed) == "medium"


def test_confidence_low_without_ffprobe():
    parsed = {"shot_id": "X", "take_number": 1, "hints": {}}
    assert R.classify_confidence({}, {}, parsed) == "low"


# ── build_synthetic_meta ─────────────────────────────────────────


def test_build_synthetic_meta_has_reclaim_flag(tmp_path):
    video = tmp_path / "foo_take1.mp4"
    video.write_bytes(b"\x00")  # stub file for mtime
    parsed = {
        "shot_id": "foo",
        "take_number": 1,
        "hints": {"model": "seedance", "pipeline": "i2v", "versioned": False},
    }
    meta = R.build_synthetic_meta(
        video,
        parsed,
        sidecar_bits={"model": "seedance-2.0", "prompt": "hello", "cost": 1.0},
        ffprobe_bits={"duration": 6, "aspect_ratio": "16:9"},
        confidence="high",
    )
    assert meta["reclaim"]["synthetic"] is True
    assert meta["reclaim"]["confidence"] == "high"
    assert meta["generation"]["model"] == "seedance-2.0"
    assert meta["generation"]["parameters"]["duration"] == 6
    assert meta["generation"]["prompt_word_count"] == 1


# ── dry-run vs apply ─────────────────────────────────────────────


def _setup_fake_orphans(tmp_path, monkeypatch):
    projects = tmp_path / "projects"
    ep = projects / "test-microdrama" / "renders" / "ep_001" / "_orphans"
    ep.mkdir(parents=True)
    (projects / ".recoil-data-root").touch()  # paths-refactor-v2 sentinel (harness pre-flight infra fix)
    # write a project_config.json with mode microdrama so the mode guard passes
    (projects / "test-microdrama" / "project_config.json").write_text(
        '{"mode": "microdrama"}'
    )
    video = ep / "REGEN_P02_multishot_take1.mp4"
    video.write_bytes(b"\x00" * 16)
    sidecar = ep / "REGEN_P02_multishot_take1.mp4.json"
    sidecar.write_text(
        json.dumps(
            {
                "provenance": {
                    "model": "seeddance-2.0",
                    "prompt": "multi-shot test",
                    "cost": 1.2,
                    "generation_params": {
                        "duration": 9,
                        "aspect_ratio": "16:9",
                        "mode": "multishot",
                    },
                    "shot_id": "REGEN_P02_multishot",
                }
            }
        )
    )
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    # ALSO patch core.paths.projects_root() so get_project() reads from tmp_path's project_config.json
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    # core.project does not export projects_root() (Phase 4.4 special case);
    # the env var below covers projects_root() callers in project.py.
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    # Stub ffprobe — don't require ffmpeg on the build host
    monkeypatch.setattr(
        R, "run_ffprobe", lambda v: {"duration": 9, "aspect_ratio": "16:9"}
    )
    return projects, ep, video, sidecar


def test_dry_run_does_not_move_or_write(tmp_path, monkeypatch):
    projects, ep, video, sidecar = _setup_fake_orphans(tmp_path, monkeypatch)
    rc = R.main(
        ["--project", "test-microdrama", "--dry-run", "--confidence-min", "medium"]
    )
    assert rc == 0
    assert video.exists()
    assert sidecar.exists()
    # no meta.yaml materialized at dest
    dest = ep.parent / "REGEN_P02_multishot_meta.yaml"
    assert not dest.exists()


def test_apply_moves_files_and_writes_meta(tmp_path, monkeypatch):
    projects, ep, video, sidecar = _setup_fake_orphans(tmp_path, monkeypatch)
    rc = R.main(
        ["--project", "test-microdrama", "--apply", "--confidence-min", "medium"]
    )
    assert rc == 0
    # video moved out of _orphans/
    assert not video.exists()
    moved_video = ep.parent / video.name
    assert moved_video.exists()
    # meta.yaml exists at dest with reclaim flag
    import yaml

    meta_path = ep.parent / "REGEN_P02_multishot_meta.yaml"
    assert meta_path.exists()
    meta = yaml.safe_load(meta_path.read_text())
    assert meta["reclaim"]["synthetic"] is True
    assert meta["reclaim"]["confidence"] == "high"
    # reclaim log exists
    log = projects / "test-microdrama" / "renders" / "_reclaim_log.jsonl"
    assert log.exists()


def test_idempotent_rerun_no_op(tmp_path, monkeypatch):
    projects, ep, video, sidecar = _setup_fake_orphans(tmp_path, monkeypatch)
    # First apply
    R.main(["--project", "test-microdrama", "--apply", "--confidence-min", "medium"])
    # Second apply — should be a no-op (no files in _orphans/ anymore)
    rc = R.main(
        ["--project", "test-microdrama", "--apply", "--confidence-min", "medium"]
    )
    assert rc == 0


def test_multi_take_no_collision(tmp_path, monkeypatch):
    """Two takes of the same shot_id must each get their own meta.yaml.

    Regression: meta_target was named `{shot_id}_meta.yaml` regardless of
    take_number, so take2 silently overwrote take1's synthetic meta. The
    idempotence guard only protects NON-synthetic metas, so synthetic-on-
    synthetic collisions slipped through.
    """
    projects = tmp_path / "projects"
    ep = projects / "test-microdrama" / "renders" / "ep_001" / "_orphans"
    ep.mkdir(parents=True)
    (projects / ".recoil-data-root").touch()  # paths-refactor-v2 sentinel (harness pre-flight infra fix)
    (projects / "test-microdrama" / "project_config.json").write_text(
        '{"mode": "microdrama"}'
    )

    # Two takes of the same shot_id
    for n, prompt in ((1, "first take"), (2, "second take")):
        v = ep / f"REGEN_P02_multishot_take{n}.mp4"
        v.write_bytes(b"\x00" * 16)
        sc = ep / f"REGEN_P02_multishot_take{n}.mp4.json"
        sc.write_text(
            json.dumps(
                {
                    "provenance": {
                        "model": "seeddance-2.0",
                        "prompt": prompt,
                        "cost": 1.2,
                        "generation_params": {
                            "duration": 9,
                            "aspect_ratio": "16:9",
                            "mode": "multishot",
                        },
                        "shot_id": "REGEN_P02_multishot",
                    }
                }
            )
        )

    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    # core.project does not export projects_root() (Phase 4.4 special case);
    # env var covers projects_root() callers in project.py.
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    monkeypatch.setattr(
        R, "run_ffprobe", lambda v: {"duration": 9, "aspect_ratio": "16:9"}
    )

    rc = R.main(
        ["--project", "test-microdrama", "--apply", "--confidence-min", "medium"]
    )
    assert rc == 0

    # Both meta.yaml files must exist — take1 keeps canonical name,
    # take2 disambiguates with _take2 suffix.
    take1_meta = ep.parent / "REGEN_P02_multishot_meta.yaml"
    take2_meta = ep.parent / "REGEN_P02_multishot_take2_meta.yaml"
    assert take1_meta.exists(), "take1 meta.yaml missing"
    assert take2_meta.exists(), "take2 meta.yaml missing — collision regressed"

    # Verify their prompts match the source takes (no overwrite happened)
    import yaml

    m1 = yaml.safe_load(take1_meta.read_text())
    m2 = yaml.safe_load(take2_meta.read_text())
    assert m1["generation"]["prompt_text"] == "first take"
    assert m2["generation"]["prompt_text"] == "second take"
    # And take_number must be preserved on each meta
    assert m1["reclaim"]["take_number"] == 1
    assert m2["reclaim"]["take_number"] == 2


# ── mode guard regression test ───────────────────────────────────


def test_client_deliverable_rejected_by_mode_guard(tmp_path, monkeypatch):
    """Mode guard must hard-error on client_deliverable BEFORE any file ops.

    Defense-in-depth: a future refactor that moves the guard below
    project_root.iterdir() would silently break the safety contract this
    test pins down.
    """
    projects = tmp_path / "projects"
    ep = projects / "client-proj" / "renders" / "ep_001" / "_orphans"
    ep.mkdir(parents=True)
    (projects / ".recoil-data-root").touch()  # paths-refactor-v2 sentinel (harness pre-flight infra fix)
    (projects / "client-proj" / "project_config.json").write_text(
        '{"mode": "client_deliverable"}'
    )
    video = ep / "REGEN_P02_take1.mp4"
    video.write_bytes(b"\x00" * 16)

    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))
    # core.project does not export projects_root() (Phase 4.4 special case);
    # env var covers projects_root() callers in project.py.
    monkeypatch.setenv("RECOIL_PROJECTS_ROOT", str(projects))

    rc = R.main(["--project", "client-proj", "--dry-run"])
    assert rc == 1
    assert video.exists(), "mode guard must fire BEFORE any file ops"
