"""Episode script text loading and hashing helpers.

Camera-tested provenance stamps hash the normalized script text, not raw file
bytes. Keep that normalization here so writers and freshness readers stay
byte-identical.
"""
from __future__ import annotations

import hashlib
import re

from recoil.core.paths import ProjectPaths


def strip_metadata(text: str) -> str:
    """Strip episode metadata headers, word counts, and non-narrative cruft.

    Keeps: scene headings, action, dialogue, beat markers (for pacing context).
    Strips: title headers, word counts, dialogue %, markdown dividers,
            [[EPISODE N]] markers.
    """
    lines = text.split("\n")
    content_lines: list[str] = []
    in_content = False
    for line in lines:
        stripped = line.strip()

        if re.match(r"^\[\[EPISODE\s+\d+", stripped):
            continue
        if stripped == "---":
            continue

        if not in_content:
            if re.match(r"^#\s+Episode\s+\d+", stripped):
                continue
            if re.match(r"^\*\*(Word Count|Dialogue|Hook|Cliffhanger)", stripped, re.IGNORECASE):
                continue
            if re.match(r"^(title|episode|word_count|words|cliffhanger|hook):", stripped, re.IGNORECASE):
                continue
            if not stripped:
                continue

            if (
                stripped.startswith("INT.")
                or stripped.startswith("EXT.")
                or re.match(r"^#\s*\[\d+:\d+", stripped)
                or len(stripped) > 10
            ):
                in_content = True

        if in_content:
            content_lines.append(line)

    return "\n".join(content_lines).strip()


def load_episode_script_text(project: str, episode: int) -> str:
    """Load and normalize screenplay text for an episode."""
    paths = ProjectPaths.for_project(project)
    fname = f"ep_{episode:03d}.md"
    ep_path = paths.episodes_dir / fname
    if not ep_path.exists():
        legacy = paths.project_root / "episodes" / fname
        if legacy.exists():
            ep_path = legacy
    if not ep_path.exists():
        raise FileNotFoundError(f"Episode script not found: {ep_path}")
    return strip_metadata(ep_path.read_text(encoding="utf-8"))


def episode_script_text_sha(script_text: str) -> str:
    """Hash already-loaded normalized script text."""
    return hashlib.md5(script_text.encode("utf-8")).hexdigest()


def episode_script_sha(project: str, episode: int) -> str:
    """Live-read script text hash used by freshness checks."""
    return episode_script_text_sha(load_episode_script_text(project, episode))


__all__ = [
    "strip_metadata",
    "load_episode_script_text",
    "episode_script_text_sha",
    "episode_script_sha",
]
