"""Report-only grouping drift reconciler.

Scans scene receipts, on-disk videos, adjacent sidecars, and grouping collapse
signatures. This module intentionally does not mutate files.
"""

from __future__ import annotations

import argparse
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Iterable

from recoil.core.naming import parse_filename
from recoil.core.paths import projects_root


@dataclass(frozen=True)
class DriftFinding:
    kind: str
    path: str
    message: str
    details: dict[str, Any] = field(default_factory=dict)

    def format(self) -> str:
        detail = f" {json.dumps(self.details, sort_keys=True)}" if self.details else ""
        return f"{self.kind}: {self.path}: {self.message}{detail}"


@dataclass(frozen=True)
class _SceneOutput:
    scene_path: Path
    scene_id: str | None
    beat_id: str | None
    output_path: Path
    raw_output_path: str
    grouping: dict[str, Any] | None


def _episode_number(episode: int | str) -> int:
    if isinstance(episode, int):
        return episode
    text = str(episode).strip()
    if text.startswith("ep_"):
        return int(text.split("_", 1)[1])
    if text.startswith("EP"):
        return int(text[2:])
    return int(text)


def _episode_slug(episode: int | str) -> str:
    return f"ep_{_episode_number(episode):03d}"


def _project_root(project: str) -> Path:
    return projects_root() / project.lower()


def _scene_dir(project_root: Path) -> Path:
    return project_root / "_pipeline" / "state" / "orchestration" / "scenes"


def _video_dir(project_root: Path, episode: int) -> Path:
    return project_root / "renders" / f"ep_{episode:03d}"


def _resolve_artifact_path(raw: str, project_root: Path) -> Path:
    p = Path(raw)
    return p if p.is_absolute() else project_root / p


def _same_path(a: Path, b: Path) -> bool:
    return a.expanduser().resolve(strict=False) == b.expanduser().resolve(strict=False)


def _path_matches(raw: str, actual: Path, project_root: Path, sidecar_dir: Path) -> bool:
    p = Path(raw)
    candidates = [p] if p.is_absolute() else [project_root / p, sidecar_dir / p]
    return any(_same_path(candidate, actual) for candidate in candidates)


def _json_files_for_episode(project_root: Path, episode_slug: str) -> list[Path]:
    base = _scene_dir(project_root)
    if not base.is_dir():
        return []
    return sorted(
        p
        for p in base.iterdir()
        if p.is_file() and p.suffix == ".json" and p.name.startswith(f"{episode_slug}_")
    )


def _load_json(path: Path) -> dict[str, Any] | None:
    try:
        raw = json.loads(path.read_text(encoding="utf-8"))
    except (OSError, json.JSONDecodeError):
        return None
    return raw if isinstance(raw, dict) else None


def _iter_scene_outputs(
    scene_path: Path,
    scene: dict[str, Any],
    project_root: Path,
) -> Iterable[_SceneOutput]:
    scene_id = scene.get("scene_id")
    for beat in scene.get("beats") or []:
        if not isinstance(beat, dict):
            continue
        beat_id = beat.get("beat_id")
        beat_meta = beat.get("beat_metadata") if isinstance(beat.get("beat_metadata"), dict) else {}
        grouping = beat_meta.get("grouping") if isinstance(beat_meta, dict) else None
        if not isinstance(grouping, dict):
            grouping = None

        for take in beat.get("takes") or []:
            if not isinstance(take, dict):
                continue
            workflow = take.get("workflow") if isinstance(take.get("workflow"), dict) else {}
            for step in workflow.get("steps") or []:
                if not isinstance(step, dict):
                    continue
                receipt = step.get("receipt") if isinstance(step.get("receipt"), dict) else {}
                run_result = (
                    receipt.get("run_result")
                    if isinstance(receipt.get("run_result"), dict)
                    else {}
                )
                raw_output = run_result.get("output_path")
                if not raw_output:
                    continue
                raw_output = str(raw_output)
                yield _SceneOutput(
                    scene_path=scene_path,
                    scene_id=str(scene_id) if scene_id else None,
                    beat_id=str(beat_id) if beat_id else None,
                    output_path=_resolve_artifact_path(raw_output, project_root),
                    raw_output_path=raw_output,
                    grouping=grouping,
                )


def _sidecar_path(video_path: Path) -> Path:
    return video_path.with_suffix(video_path.suffix + ".json")


def _grouping_signature(grouping: dict[str, Any] | None) -> tuple[str, int] | None:
    if not isinstance(grouping, dict):
        return None
    strategy = grouping.get("strategy")
    ordinal = grouping.get("ordinal")
    if not strategy or ordinal is None:
        return None
    try:
        return str(strategy), int(ordinal)
    except (TypeError, ValueError):
        return None


def _grouping_shots(grouping: dict[str, Any] | None) -> tuple[str, ...] | None:
    if not isinstance(grouping, dict):
        return None
    shot_ids = grouping.get("shot_ids")
    if not isinstance(shot_ids, (list, tuple)):
        return None
    return tuple(str(s) for s in shot_ids)


def _parsed_signature(parsed: dict[str, Any] | None) -> tuple[str, int] | None:
    if not parsed:
        return None
    return str(parsed["strategy"]), int(parsed["ordinal"])


def _parsed_shots(parsed: dict[str, Any] | None) -> tuple[str, ...] | None:
    if not parsed:
        return None
    return tuple(str(s) for s in parsed.get("shot_ids") or [])


def _same_grouping(
    left: dict[str, Any] | None,
    right: dict[str, Any] | None,
    *,
    compare_shots: bool = True,
) -> bool:
    if _grouping_signature(left) != _grouping_signature(right):
        return False
    if compare_shots:
        left_shots = _grouping_shots(left)
        right_shots = _grouping_shots(right)
        if left_shots is not None and right_shots is not None and left_shots != right_shots:
            return False
    return True


def _parsed_matches_grouping(
    parsed: dict[str, Any] | None,
    grouping: dict[str, Any] | None,
) -> bool:
    if _parsed_signature(parsed) != _grouping_signature(grouping):
        return False
    parsed_shots = _parsed_shots(parsed)
    grouping_shots = _grouping_shots(grouping)
    if parsed_shots is not None and grouping_shots is not None and parsed_shots != grouping_shots:
        return False
    return True


def find_grouping_drift(project: str, episode: int | str) -> list[DriftFinding]:
    """Return report-only grouping drift findings for a project episode."""
    episode_num = _episode_number(episode)
    episode_slug = _episode_slug(episode)
    project_root = _project_root(project)
    findings: list[DriftFinding] = []
    outputs: list[_SceneOutput] = []
    scene_groupings_by_video: dict[Path, list[dict[str, Any]]] = {}
    collapse_sets: dict[tuple[str, int], set[tuple[str, ...]]] = {}

    for path in _json_files_for_episode(project_root, episode_slug):
        scene = _load_json(path)
        if scene is None:
            continue
        for output in _iter_scene_outputs(path, scene, project_root):
            outputs.append(output)
            if output.grouping is not None:
                key = output.output_path.resolve(strict=False)
                scene_groupings_by_video.setdefault(key, []).append(output.grouping)
                sig = _grouping_signature(output.grouping)
                shots = _grouping_shots(output.grouping)
                if sig and sig[1] == 1 and sig[0] != "solo" and shots:
                    collapse_sets.setdefault(sig, set()).add(shots)

            if not output.output_path.exists():
                findings.append(
                    DriftFinding(
                        kind="missing_output_path",
                        path=str(output.scene_path),
                        message="scene receipt output_path points to a missing file",
                        details={
                            "scene_id": output.scene_id,
                            "beat_id": output.beat_id,
                            "output_path": output.raw_output_path,
                        },
                    )
                )
                continue

            parsed = parse_filename(output.output_path.name)
            if output.grouping is not None and parsed and not _parsed_matches_grouping(parsed, output.grouping):
                findings.append(
                    DriftFinding(
                        kind="filename_scene_grouping_mismatch",
                        path=str(output.output_path),
                        message="on-disk filename grouping disagrees with scene metadata",
                        details={
                            "filename": {
                                "strategy": parsed["strategy"],
                                "ordinal": parsed["ordinal"],
                                "shot_ids": parsed["shot_ids"],
                            },
                            "scene_grouping": output.grouping,
                            "scene_path": str(output.scene_path),
                            "beat_id": output.beat_id,
                        },
                    )
                )

    for video_path in sorted(_video_dir(project_root, episode_num).glob("*.mp4")):
        parsed = parse_filename(video_path.name)
        if parsed:
            sig = _parsed_signature(parsed)
            shots = _parsed_shots(parsed)
            if sig and sig[1] == 1 and sig[0] != "solo" and shots:
                collapse_sets.setdefault(sig, set()).add(shots)

        sidecar = _sidecar_path(video_path)
        if not sidecar.exists():
            continue
        sidecar_data = _load_json(sidecar)
        if sidecar_data is None:
            continue

        raw_video_path = sidecar_data.get("video_path")
        if raw_video_path and not _path_matches(
            str(raw_video_path),
            video_path,
            project_root,
            sidecar.parent,
        ):
            findings.append(
                DriftFinding(
                    kind="sidecar_video_path_mismatch",
                    path=str(sidecar),
                    message="sidecar video_path disagrees with actual video path",
                    details={
                        "sidecar_video_path": str(raw_video_path),
                        "actual_video_path": str(video_path),
                    },
                )
            )

        provenance = sidecar_data.get("provenance")
        sidecar_grouping = (
            provenance.get("grouping")
            if isinstance(provenance, dict) and isinstance(provenance.get("grouping"), dict)
            else None
        )
        if sidecar_grouping is None:
            continue

        if parsed and not _parsed_matches_grouping(parsed, sidecar_grouping):
            findings.append(
                DriftFinding(
                    kind="sidecar_grouping_mismatch",
                    path=str(sidecar),
                    message="sidecar provenance.grouping disagrees with filename",
                    details={
                        "filename": {
                            "strategy": parsed["strategy"],
                            "ordinal": parsed["ordinal"],
                            "shot_ids": parsed["shot_ids"],
                        },
                        "sidecar_grouping": sidecar_grouping,
                    },
                )
            )

        scene_groupings = scene_groupings_by_video.get(video_path.resolve(strict=False), [])
        if scene_groupings and not any(
            _same_grouping(sidecar_grouping, scene_grouping)
            for scene_grouping in scene_groupings
        ):
            findings.append(
                DriftFinding(
                    kind="sidecar_scene_grouping_mismatch",
                    path=str(sidecar),
                    message="sidecar provenance.grouping disagrees with scene grouping",
                    details={
                        "sidecar_grouping": sidecar_grouping,
                        "scene_groupings": scene_groupings,
                    },
                )
            )

    for (strategy, ordinal), shot_sets in sorted(collapse_sets.items()):
        if ordinal == 1 and len(shot_sets) > 1:
            findings.append(
                DriftFinding(
                    kind="collapsed_ordinal_001",
                    path=str(_video_dir(project_root, episode_num)),
                    message="multiple distinct shot sets collapsed to ordinal 001",
                    details={
                        "strategy": strategy,
                        "ordinal": ordinal,
                        "shot_sets": [list(s) for s in sorted(shot_sets)],
                    },
                )
            )

    return findings


def _build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("project")
    parser.add_argument("episode", help="Episode number, ep_001, or EP001")
    parser.add_argument(
        "--fail-on-findings",
        action="store_true",
        help="Exit nonzero when drift findings are present.",
    )
    return parser


def main(argv: list[str] | None = None) -> int:
    parser = _build_parser()
    args = parser.parse_args(argv)
    findings = find_grouping_drift(args.project, args.episode)
    if not findings:
        print("No grouping drift findings.")
    else:
        for finding in findings:
            print(finding.format())
    return 1 if args.fail_on_findings and findings else 0


if __name__ == "__main__":
    raise SystemExit(main())
