"""Canonical naming grammar—single regex, single builder, single parser.

R4 grammar (NAMING_DIGEST.md lock 2026-05-21), extended 2026-06-09:
    Single-shot:       {EP}_SH{shot_list}_take{N}.{EXT}
    Legacy coverage:   {EP}_PASS_{CCC}_SH{shot_list}_take{N}.{EXT}
    Strategy grouped:  {EP}_{CONT|COV|ONER}_{CCC}_SH{shot_list}_take{N}.{EXT}

Examples:
    EP001_SH10_take16.mp4
    EP001_SH33_take7.mp4
    EP001_PASS_007_SH16_17_18_take2.mp4
    EP001_COV_007_SH16_17_18_take2.mp4
    EP001_SH10_take16~a.mp4              (AMEND_SPEC_01 happy-accident suffix)
    EP001_SH10_take16.mp4.json           (sidecar—compound .mp4.json ext)

Dropped from filename (migrated to sidecar, per NAMING_DIGEST §"Field migration"):
    PROJECT  → folder path encodes it
    TAG      → sidecar.tag / segment_class / pass_role
    MODEL    → sidecar.model_filename_id
    PASS_CCC → omitted for single-shot dispatch

Design decisions (R2-locked, R4-tightened):
    - parse_filename returns None on no match (NOT raises).
    - build_filename asserts FILENAME_PATTERN.match(result) before returning—no fallback.
    - The legacy R3 verbose grammar was inlined into the Phase 2 migration
      script in Phase 11 cleanup (2026-05-21); naming.py is SHORT-grammar only.
"""

from __future__ import annotations

import json
import re
from pathlib import Path
from typing import Optional

# ---------------------------------------------------------------------------
# Pattern
# ---------------------------------------------------------------------------
# R4 SHORT grammar—7 capture groups.
#   1: episode token       (e.g. EP001)               EP\d{3}
#   2: grouping token      (PASS, CONT, COV, ONER)    OR absent
#   3: grouping ordinal    (e.g. 007 or empty)        \d{3} OR absent
#   4: shot list           (e.g. 10, 16_17_18, 5a)    digits + optional lowercase + take-suffix tilde block
#   5: take number         (e.g. 16)                  \d+
#   6: take suffix         (e.g. ~a or empty)         optional happy-accident suffix
#   7: extension           (e.g. .mp4, .mp4.json)     literal dot + alnum (compound .mp4.json supported)
#
# The shot_list group allows AMEND_SPEC_01 happy-accident suffixes ("~a", "~b") on
# the take token—handled in build_filename by appending after the take number.
# For canonical parsing, "~a"/"~b" appears between the take number and the extension:
#     EP001_SH10_take16~a.mp4   -> shot_list=10, take=16, suffix=~a
# The regex captures the optional suffix in group 4's character class via a trailing
# alternation; parse_filename surfaces it via the `take_suffix` key.

FILENAME_PATTERN: re.Pattern[str] = re.compile(
    r"^(EP\d{3})"                                             # 1: episode
    r"(?:_(PASS|CONT|COV|ONER)_(\d{3}))?"                     # 2-3: optional grouping token + ordinal
    r"_SH(\d{1,4}[a-z]?(?:_\d{1,4}[a-z]?)*)"                  # 4: shot list
    r"_take(\d+)(~[a-z])?"                                    # 5-6: take + optional ~a/~b suffix
    r"(\.[a-zA-Z0-9]+(?:\.[a-zA-Z0-9]+)?)$"                   # 7: extension
)

# Legacy R3 verbose grammar deleted Phase 11 (2026-05-21)—it now lives inline
# in recoil/pipeline/tools/migrate_pass_names_r4.py, which is its only
# consumer. naming.py is SHORT-grammar only.

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_SHOT_TOKEN_RE = re.compile(r"SH(\d{1,4}[a-z]?)", re.IGNORECASE)
STRATEGY_TOKENS = {
    "continuity": "CONT",
    "coverage": "COV",
    "oner": "ONER",
}
TOKEN_STRATEGIES = {token: strategy for strategy, token in STRATEGY_TOKENS.items()}
SUPPORTED_STRATEGIES = frozenset((*STRATEGY_TOKENS.keys(), "solo"))


def _shot_list_token(shot_ids: list[str]) -> str:
    """Convert a list of shot IDs into the canonical underscore-joined token.

    Examples:
        ["EP001_SH30", "EP001_SH31"]    -> "30_31"
        ["EP001_SH1a", "EP001_SH2"]     -> "1a_2"
        ["EP001_SH5"]                   -> "5"

    Raises:
        ValueError: if any shot_id does not contain a parseable SH<n> token.
    """
    if not shot_ids:
        raise ValueError("shot_ids cannot be empty")
    tokens: list[str] = []
    for sid in shot_ids:
        m = _SHOT_TOKEN_RE.search(sid)
        if not m:
            raise ValueError(
                f"shot_id {sid!r} has no SH<num> token; cannot build filename"
            )
        tokens.append(m.group(1).lower())
    return "_".join(tokens)


_MODEL_PROFILES_CACHE: Optional[dict] = None


def _load_model_profiles() -> dict:
    """Lazy-load model_profiles.json. Returns {} if the file is unreadable."""
    global _MODEL_PROFILES_CACHE
    if _MODEL_PROFILES_CACHE is not None:
        return _MODEL_PROFILES_CACHE
    try:
        path = Path(__file__).resolve().parent.parent / "config" / "model_profiles.json"
        if path.exists():
            _MODEL_PROFILES_CACHE = json.loads(path.read_text())
            return _MODEL_PROFILES_CACHE
    except (OSError, json.JSONDecodeError):
        pass
    _MODEL_PROFILES_CACHE = {}
    return _MODEL_PROFILES_CACHE


def _model_filename_id(model: str) -> str:
    """Normalize a model id for use inside a filename.

    Reads model_profiles.json::<model>.filename_id if available. Otherwise
    lowercases and replaces ".", " ", "/" with "-".

    Examples:
        "seeddance-2.0"      -> "seeddance-2-0"
        "kling-v3-i2v"       -> "kling-v3-i2v"
        "gemini-2.5-flash-image" -> "gemini-2-5-flash-image"
    """
    profiles = _load_model_profiles()
    if model in profiles and isinstance(profiles[model], dict):
        canonical = profiles[model].get("filename_id")
        if canonical:
            return str(canonical)
    safe = model.lower()
    for ch in (".", " ", "/", "_"):
        safe = safe.replace(ch, "-")
    # Collapse repeated hyphens.
    safe = re.sub(r"-+", "-", safe).strip("-")
    if not safe:
        raise ValueError(f"model {model!r} normalizes to empty filename_id")
    return safe


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def build_filename(
    *,
    episode: int,
    shot_ids: list[str],
    take: int,
    strategy: str | None = None,
    ordinal: int | None = None,
    pass_counter: int | None = None,
    take_suffix: str | None = None,
    ext: str = ".mp4",
) -> str:
    """Build a canonical SHORT-grammar output filename.

    Args:
        episode: Episode number (e.g. 1 -> "EP001").
        shot_ids: List of shot IDs (e.g. ["EP001_SH30", "EP001_SH31"]).
        take: Take number (>= 1).
        strategy: Grouping strategy for new grouped filenames. Supported values
            are "continuity", "coverage", "oner", and "solo".
        ordinal: Strategy grouping ordinal. Required for continuity, coverage,
            and oner. Solo uses ordinal sentinel 0 but emits no strategy token.
        pass_counter: Legacy pass counter (0-999) for multi-shot pass segments.
            If None (default), emitted as SINGLE-SHOT—no _PASS_NNN_ token.
            Multi-shot dispatch must pass a non-None pass_counter.
        take_suffix: Optional "~a" / "~b" / ... happy-accident suffix per
            AMEND_SPEC_01:51. Must start with "~" and be lowercase a-z if set.
        ext: File extension, with or without leading dot. Defaults to ".mp4".
             Supports compound extensions like ".mp4.json" for sidecars.

    Returns:
        The canonical filename string.

    Raises:
        ValueError: If any input is invalid OR if the assembled name fails to
            round-trip through FILENAME_PATTERN.
    """
    if not isinstance(episode, int) or episode < 0:
        raise ValueError(f"episode must be a non-negative int, got {episode!r}")
    if not isinstance(take, int) or take < 1:
        raise ValueError(f"take must be >= 1, got {take!r}")
    if pass_counter is not None and (strategy is not None or ordinal is not None):
        raise ValueError("pass_counter cannot be combined with strategy or ordinal")
    if strategy is None and ordinal is not None:
        raise ValueError("ordinal requires strategy")
    if strategy is not None:
        if strategy not in SUPPORTED_STRATEGIES:
            raise ValueError(
                f"strategy must be one of {sorted(SUPPORTED_STRATEGIES)}, got {strategy!r}"
            )
        if strategy == "solo":
            if ordinal is not None and ordinal != 0:
                raise ValueError(f"solo ordinal must be 0 or None, got {ordinal!r}")
        else:
            if ordinal is None:
                raise ValueError(f"ordinal is required for strategy {strategy!r}")
            if not isinstance(ordinal, int) or not (1 <= ordinal <= 999):
                raise ValueError(
                    f"ordinal must be 1-999 for strategy {strategy!r}, got {ordinal!r}"
                )
    if pass_counter is not None:
        if not isinstance(pass_counter, int) or not (0 <= pass_counter <= 999):
            raise ValueError(
                f"pass_counter must be 0-999 or None, got {pass_counter!r}"
            )
    if take_suffix is not None:
        if not (
            isinstance(take_suffix, str)
            and len(take_suffix) == 2
            and take_suffix.startswith("~")
            and take_suffix[1].isalpha()
            and take_suffix[1].islower()
        ):
            raise ValueError(
                f"take_suffix must be ~<lowercase letter> or None, got {take_suffix!r}"
            )

    episode_token = f"EP{episode:03d}"
    shot_token = _shot_list_token(shot_ids)
    if strategy is not None and strategy != "solo":
        pass_token = f"_{STRATEGY_TOKENS[strategy]}_{ordinal:03d}"
    else:
        pass_token = f"_PASS_{pass_counter:03d}" if pass_counter is not None else ""
    suffix_token = take_suffix or ""

    if not ext.startswith("."):
        ext = "." + ext

    name = f"{episode_token}{pass_token}_SH{shot_token}_take{take}{suffix_token}{ext}"

    # No-fallback sanity check—refuse to return non-canonical names.
    if not FILENAME_PATTERN.match(name):
        raise ValueError(
            f"build_filename produced non-canonical name {name!r}; "
            f"inputs: episode={episode!r} strategy={strategy!r} "
            f"ordinal={ordinal!r} pass_counter={pass_counter!r} shot_ids={shot_ids!r} "
            f"take={take!r} take_suffix={take_suffix!r} ext={ext!r}"
        )
    return name


def parse_filename(filename: str) -> Optional[dict]:
    """Parse a canonical SHORT-grammar filename into its components.

    Returns None on no match—callers can do `if parsed := parse_filename(name): ...`.

    Returns dict keys on match:
        episode_token  str    e.g. "EP001"
        episode        int    e.g. 1
        pass_counter   int|None  e.g. 7 (legacy PASS) or None (single-shot)
        strategy       str       "continuity", "coverage", "oner", or "solo"
        ordinal        int       strategy ordinal, or 0 for solo
        grouping_token str|None  raw token from filename: PASS, CONT, COV, ONER
        legacy_grouping bool     True for legacy _PASS_ filenames
        shot_list      str    e.g. "10" or "16_17_18"
        shot_tokens    list[str]   e.g. ["10"] or ["16","17","18"]
        shot_ids       list[str]   e.g. ["EP001_SH10"]
        take           int    e.g. 16
        take_suffix    str|None  e.g. "~a" or None
        ext            str    e.g. ".mp4" or ".mp4.json"
    """
    m = FILENAME_PATTERN.match(filename)
    if not m:
        return None

    episode_token = m.group(1)
    grouping_token = m.group(2)
    ordinal_str = m.group(3)
    shot_list = m.group(4)
    take_str = m.group(5)
    take_suffix = m.group(6)
    ext = m.group(7)
    shot_tokens = shot_list.split("_")
    legacy_grouping = grouping_token == "PASS"
    if grouping_token is None:
        strategy = "solo"
        ordinal = 0
    elif legacy_grouping:
        strategy = "coverage"
        ordinal = int(ordinal_str)
    else:
        strategy = TOKEN_STRATEGIES[grouping_token]
        ordinal = int(ordinal_str)

    return {
        "episode_token": episode_token,
        "episode": int(episode_token[2:]),
        "pass_counter": int(ordinal_str) if legacy_grouping else None,
        "strategy": strategy,
        "ordinal": ordinal,
        "grouping_token": grouping_token,
        "legacy_grouping": legacy_grouping,
        "shot_list": shot_list,
        "shot_tokens": shot_tokens,
        "shot_ids": [f"{episode_token}_SH{tok}" for tok in shot_tokens],
        "take": int(take_str),
        "take_suffix": take_suffix,
        "ext": ext,
    }


def parse_to_build_args(parsed: dict) -> dict:
    """Translate a parse_filename(...) result back into build_filename(...) kwargs.

    Used by round-trip tests and by audit_assertions.assert_canonical_filename.
    The SHORT grammar drops project/tag/model—those are now sidecar-only.
    """
    args = {
        "episode": parsed["episode"],
        "shot_ids": parsed["shot_ids"],
        "take": parsed["take"],
        "take_suffix": parsed.get("take_suffix"),
        "ext": parsed["ext"],
    }
    if parsed.get("legacy_grouping") or (
        "strategy" not in parsed and parsed.get("pass_counter") is not None
    ):
        args["pass_counter"] = parsed["pass_counter"]
    else:
        args["strategy"] = parsed.get("strategy", "solo")
        if args["strategy"] != "solo":
            args["ordinal"] = parsed["ordinal"]
    return args


# ---------------------------------------------------------------------------
# Backward-compat aliases removed Phase 11 (2026-05-21). All callers migrated
# to build_filename / parse_filename / FILENAME_PATTERN. The shim module
# recoil/core/video_naming.py was deleted in the same phase.
# ---------------------------------------------------------------------------


# ---------------------------------------------------------------------------
# Module-load round-trip self-test (cheap; runs once at import)
# ---------------------------------------------------------------------------
def _self_test() -> None:
    """Round-trip every fixture filename. Fail-fast on import if grammar is broken."""
    fixtures: list[dict] = [
        # Single-shot, plain
        {
            "episode": 1,
            "shot_ids": ["EP001_SH10"],
            "take": 16,
            "ext": ".mp4",
        },
        # Single-shot with happy-accident suffix
        {
            "episode": 1,
            "shot_ids": ["EP001_SH10"],
            "take": 16,
            "take_suffix": "~a",
            "ext": ".mp4",
        },
        # Multi-shot PASS segment
        {
            "episode": 1,
            "pass_counter": 7,
            "shot_ids": ["EP001_SH16", "EP001_SH17", "EP001_SH18"],
            "take": 2,
            "ext": ".mp4",
        },
        # New strategy-token grouped segment
        {
            "episode": 1,
            "strategy": "coverage",
            "ordinal": 7,
            "shot_ids": ["EP001_SH16", "EP001_SH17", "EP001_SH18"],
            "take": 2,
            "ext": ".mp4",
        },
        # Sidecar (compound .mp4.json extension)
        {
            "episode": 1,
            "shot_ids": ["EP001_SH33"],
            "take": 7,
            "ext": ".mp4.json",
        },
    ]
    for args in fixtures:
        name = build_filename(**args)
        parsed = parse_filename(name)
        assert parsed is not None, f"parse_filename returned None for {name!r}"
        rebuilt = build_filename(**parse_to_build_args(parsed))
        assert rebuilt == name, (
            f"Round-trip mismatch: built {name!r} but rebuilt {rebuilt!r}"
        )


_self_test()


def next_take_number(
    video_dir: Path,
    *,
    episode: int,
    strategy: str,
    ordinal: int,
    shot_ids: list[str],
) -> int:
    """Return the next take number for a strategy-grouped pass.

    Scans ``video_dir`` for existing outputs that share the canonical
    ``build_filename`` stem for this (episode, strategy, ordinal, shot_ids),
    differing only in their ``_take<N>`` suffix, and returns ``max(N) + 1``
    (or 1 when none exist). The scan stem is derived from ``build_filename``
    itself, so it always tracks the live filename grammar — preventing the
    REC-102 regression where a stale ``*{pass_id}_take*`` glob no longer
    matched strategy-token names and re-generation overwrote ``take1``.
    """
    scan_name = build_filename(
        episode=episode,
        strategy=strategy,
        ordinal=ordinal,
        shot_ids=list(shot_ids),
        take=1,
    )
    scan_prefix = re.sub(r"_take\d+(?:\.[a-zA-Z0-9]+)+$", "_take", scan_name)
    max_take = 0
    for vp in sorted(video_dir.glob(f"{scan_prefix}*.mp4")):
        tm = re.search(r"_take(\d+)(?:\.[a-zA-Z0-9]+)+$", vp.name)
        if tm:
            max_take = max(max_take, int(tm.group(1)))
    return max_take + 1


if __name__ == "__main__":
    demo_args = dict(
        episode=1,
        strategy="coverage",
        ordinal=7,
        shot_ids=["EP001_SH16", "EP001_SH17", "EP001_SH18"],
        take=2,
        ext=".mp4",
    )
    name = build_filename(**demo_args)
    print(f"Built:  {name}")  # EP001_COV_007_SH16_17_18_take2.mp4
    parsed = parse_filename(name)
    print(f"Parsed: {parsed}")
    rebuilt = build_filename(**parse_to_build_args(parsed))
    print(f"Round-trip OK: {rebuilt == name}")
