"""Structural vs content fingerprints for the derivation-manifest freshness model.

structural_sha hashes ONLY the plan-shot fields that downstream grouping/passes/boards
actually break or partition on (verified against the live consumers, 2026-06-13):
scene_clusterer breaks on shot_type (drastic-angle-jump), location_id; coverage_planner
keys runs on (location_id, is_env_only) and chunks by target_editorial_duration_s. A
creative re-roll that keeps the same shots/boundaries/locations/grouping-inputs yields an
IDENTICAL structural_sha (so approved boards/passes survive) but a new content_sha.
"""
from __future__ import annotations
import hashlib
import json
from typing import Any

# Version of the DERIVED grouping parameters (NOT plan-shot fields): bump when
# scene_clusterer._DRASTIC_JUMP, max_batch_size default, or coverage chunk caps change,
# so a heuristic change invalidates cached grouping/passes even with identical plan shots.
GROUPING_PARAM_VERSION = 1


def _shot_structural_tuple(shot: dict[str, Any]) -> list[Any]:
    """The structural slice of ONE plan shot (ShotRecord dict). Names verified against
    render_schema.ShotRecord on origin/main afef91e4."""
    routing = shot.get("routing_data") or {}
    prompt = shot.get("prompt_data") or {}
    asset = shot.get("asset_data") or {}
    spatial = shot.get("spatial_data") or {}
    # Normalized character membership — coverage_planner uses characters to pick
    # focus/element refs (coverage_planner.py:492,696); a JADE→WREN swap must invalidate.
    chars = sorted(
        (c.get("char_id") for c in (asset.get("characters") or []) if c.get("char_id")),
    )
    t = [
        shot.get("shot_id"),
        shot.get("scene_index"),
        asset.get("location_id"),
        prompt.get("shot_type"),
        bool(routing.get("is_env_only", False)),
        routing.get("target_editorial_duration_s"),
        chars,                                  # character membership (C2)
        spatial.get("camera_side"),             # A/B side — drives pass naming/labeling (C2)
        # REC-180 derived 180-line fields — now drive prompt/blocking + the critic, so a
        # re-derive that changes them (via axis_plans) MUST invalidate locked boards/passes
        # even when camera_side is unchanged.
        spatial.get("screen_direction"),
        spatial.get("axis_segment_id"),
        spatial.get("cut_relation"),
    ]
    # Forward-compat: D6 will add a per-shot sublocation_id. Hash it ONLY if present so
    # this fingerprint stays stable today (field absent) and tightens automatically later.
    subloc = shot.get("sublocation_id") or spatial.get("sublocation")
    if subloc is not None:
        t.append(("sublocation", subloc))
    return t


def plan_structural_sha(plan: dict[str, Any]) -> str:
    """Deterministic structural fingerprint of an EpisodePlan dict."""
    shots = plan.get("shots") or []
    payload = {
        "episode_id": plan.get("episode_id"),
        "shot_count": len(shots),
        "shots": [_shot_structural_tuple(s) for s in shots],  # ordered → captures order+membership
        "grouping_param_version": GROUPING_PARAM_VERSION,
    }
    blob = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str)
    return "sha256:" + hashlib.sha256(blob.encode("utf-8")).hexdigest()


def content_sha(artifact: dict[str, Any]) -> str:
    """Full-bytes fingerprint of an artifact dict (everything, incl. creative prose)."""
    blob = json.dumps(artifact, sort_keys=True, separators=(",", ":"), default=str)
    return "sha256:" + hashlib.sha256(blob.encode("utf-8")).hexdigest()


def board_content_freshness_sha(shot_script_spans: dict[str, str | None]) -> str:
    """Content freshness signal for a board's covered script spans.

    This is intentionally separate from the board v2 cache identity fingerprint,
    which remains structure-only. Versioning the payload lets future membership
    or span-semantics changes invalidate old records cleanly.
    """
    payload = {
        "v": 1,
        "kind": "board_content_freshness",
        "spans": sorted(shot_script_spans.items()),
    }
    return hashlib.sha256(json.dumps(payload, default=str).encode()).hexdigest()


_WILDCARD_SENTINEL = "wildcard"


def shotset_hash(shot_ids: "list[str]") -> str:
    """Order-independent identity for the SET of plan shots a derived unit covers
    (D2/L1). De-dupes + sorts, then hashes with the SAME canonicalization as
    plan_structural_sha. The helper is the SINGLE place sentinel/empty filtering
    happens (spec-review MINOR): it drops falsey ids AND the "wildcard" sentinel,
    so callers cannot accidentally fold a sentinel into the identity. NOT keyed on
    anything yet — pure identity metadata."""
    payload = {"shot_ids": sorted({s for s in shot_ids if s and s != _WILDCARD_SENTINEL})}
    blob = json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str)
    return "sha256:" + hashlib.sha256(blob.encode("utf-8")).hexdigest()


__all__ = [
    "plan_structural_sha",
    "content_sha",
    "board_content_freshness_sha",
    "shotset_hash",
    "GROUPING_PARAM_VERSION",
]
