"""Puzzle Box episode parser.

Parses the 2-layer script format (Narrative + Pipeline Direction)
and extracts shots with beat assignments and grammar tags.

The Puzzle Box script format has two separated sections:
1. NARRATIVE SCRIPT — audience-facing (VO, fragment, endings)
2. PIPELINE DIRECTION — engine-facing (shot-by-shot specs)

This parser reads the Pipeline Direction section to extract shots,
maps them to beats, and attaches grammar metadata.
"""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from typing import Optional


@dataclass
class PuzzleBoxShot:
    """A single shot extracted from a Puzzle Box episode."""

    shot_number: int
    beat: str  # ENTRY_IMAGE, VOICE, LINGER, BREAK, AFTERMATH
    duration_seconds: float
    shot_type: str  # WS, MS, CU, ECU, etc.
    description: str
    grammar_tag: Optional[str] = None  # SADIE, DUSTY, or None
    characters: list = field(default_factory=list)
    camera_movement: str = "static"

    def to_dict(self) -> dict:
        return {
            "shot_number": self.shot_number,
            "beat": self.beat,
            "duration_seconds": self.duration_seconds,
            "shot_type": self.shot_type,
            "description": self.description,
            "grammar_tag": self.grammar_tag,
            "characters": self.characters,
            "camera_movement": self.camera_movement,
        }


@dataclass
class PuzzleBoxEpisode:
    """Parsed episode structure."""

    episode_number: int
    title: str
    metadata: dict
    shots: list  # List of PuzzleBoxShot
    narrative_text: str
    audio_direction: dict
    is_fracture: bool = False

    def to_dict(self) -> dict:
        return {
            "episode_number": self.episode_number,
            "title": self.title,
            "metadata": self.metadata,
            "shots": [s.to_dict() for s in self.shots],
            "is_fracture": self.is_fracture,
            "shot_count": len(self.shots),
            "audio_direction": self.audio_direction,
        }


# ── Pipeline Direction beat+shots pattern ─────────────────────────
_PIPELINE_BEAT = re.compile(
    r"^###\s+(ENTRY IMAGE|VOICE|LINGER|BREAK|AFTERMATH)"
    r"\s*\((\d+)s\)"  # Duration
    r"\s*[—–-]\s*(\d+)\s*shots?"  # Shot count
    r"\s*$",
    re.IGNORECASE,
)

_SHOT_LINE = re.compile(
    r"^-\s+Shot\s+(\d+)\s+\((\d+)s\):\s+"
    r"(E?CU|BCU|MCU|MS|MFS|MLS|MWS|LS|FS|WS|EWS|VLS|WIDE|INSERT)\b"
    r"[.\s]*(.*)",
    re.IGNORECASE,
)

_GRAMMAR_TAG = re.compile(
    r"\[?GRAMMAR:\s*(\w+)\]?",
    re.IGNORECASE,
)

# Camera movement keywords
_MOVEMENT_KEYWORDS = {
    "static": "static",
    "drift": "drift",
    "push-in": "push_in",
    "push in": "push_in",
    "pull-back": "pull_back",
    "pull back": "pull_back",
    "tracking": "tracking",
    "pan": "pan",
    "tilt": "tilt",
    "handheld": "handheld",
    "hold": "static",
    "slow": "drift",
}

# Character name patterns — extended per project at import time
_KNOWN_CHARACTERS = {"SADIE", "DUSTY"}


def _detect_movement(description: str) -> str:
    """Extract camera movement from shot description."""
    desc_lower = description.lower()
    for keyword, movement in _MOVEMENT_KEYWORDS.items():
        if keyword in desc_lower:
            return movement
    return "static"


def _extract_characters(description: str) -> list[str]:
    """Extract character names from shot description."""
    found = []
    for name in _KNOWN_CHARACTERS:
        if name.lower() in description.lower():
            found.append(name)
    return found


def parse_episode(script_text: str) -> dict:
    """Parse a Puzzle Box episode script into structured shot data.

    Args:
        script_text: Full episode markdown text (both layers).

    Returns:
        Dict with episode metadata, shots list, and audio direction.
        Each shot has: shot_number, beat, duration, shot_type,
        description, grammar_tag, characters, camera_movement.
    """
    lines = script_text.split("\n")

    # ── Extract episode header ────────────────────────────────────
    ep_number = 0
    title = ""
    for line in lines[:5]:
        m = re.match(r"^#\s+EP(\d+)\s*[—–-]\s*(.+)$", line)
        if m:
            ep_number = int(m.group(1))
            title = m.group(2).strip()
            break

    # ── Extract metadata ──────────────────────────────────────────
    metadata = {}
    in_metadata = False
    for line in lines:
        if line.strip() == "## Metadata":
            in_metadata = True
            continue
        if in_metadata:
            if line.startswith("##") or line.strip() == "---":
                break
            m = re.match(r"^-\s+\*?\*?(\w[\w\s/]+)\*?\*?:\s*(.+)$", line)
            if m:
                key = m.group(1).strip().lower().replace(" ", "_").replace("/", "_")
                metadata[key] = m.group(2).strip()

    is_fracture = (
        "fracture" in title.lower() or metadata.get("rhythm", "").lower() == "kinetic"
    )

    # ── Find Pipeline Direction section ───────────────────────────
    pipeline_start = None
    for i, line in enumerate(lines):
        if re.match(r"^##\s+PIPELINE DIRECTION", line, re.IGNORECASE):
            pipeline_start = i
            break

    shots = []
    audio_direction = {}

    if pipeline_start is not None:
        current_beat = None

        for line in lines[pipeline_start + 1 :]:
            stripped = line.strip()

            # Audio Direction section
            if re.match(r"^###\s+Audio Direction", stripped, re.IGNORECASE):
                current_beat = None
                continue
            if stripped.startswith("- Ambient bed:"):
                audio_direction["ambient_bed"] = stripped.split(":", 1)[1].strip()
                continue
            if stripped.startswith("- Spot SFX:"):
                audio_direction["spot_sfx"] = stripped.split(":", 1)[1].strip()
                continue
            if stripped.startswith("- VO delivery:"):
                audio_direction["vo_delivery"] = stripped.split(":", 1)[1].strip()
                continue

            # Beat headers
            beat_match = _PIPELINE_BEAT.match(stripped)
            if beat_match:
                current_beat = beat_match.group(1).upper().replace(" ", "_")
                continue

            # Shot lines
            shot_match = _SHOT_LINE.match(stripped)
            if shot_match and current_beat:
                shot_num = int(shot_match.group(1))
                duration = int(shot_match.group(2))
                shot_type = shot_match.group(3).upper()
                description = shot_match.group(4).strip()

                # Check for grammar tag
                grammar_tag = None
                grammar_match = _GRAMMAR_TAG.search(description)
                if grammar_match:
                    grammar_tag = grammar_match.group(1).upper()
                    description = _GRAMMAR_TAG.sub("", description).strip()

                characters = _extract_characters(description)
                movement = _detect_movement(description)

                shot = PuzzleBoxShot(
                    shot_number=shot_num,
                    beat=current_beat,
                    duration_seconds=float(duration),
                    shot_type=shot_type,
                    description=description,
                    grammar_tag=grammar_tag,
                    characters=characters,
                    camera_movement=movement,
                )
                shots.append(shot)

    # ── Extract narrative text ────────────────────────────────────
    narrative_start = None
    narrative_end = pipeline_start or len(lines)
    for i, line in enumerate(lines):
        if re.match(r"^##\s+NARRATIVE SCRIPT", line, re.IGNORECASE):
            narrative_start = i
            break

    narrative_text = ""
    if narrative_start is not None:
        narrative_lines = lines[narrative_start:narrative_end]
        narrative_text = "\n".join(narrative_lines)

    episode = PuzzleBoxEpisode(
        episode_number=ep_number,
        title=title,
        metadata=metadata,
        shots=shots,
        narrative_text=narrative_text,
        audio_direction=audio_direction,
        is_fracture=is_fracture,
    )

    return episode.to_dict()