"""Extract per-character visual grammars from characters.md.

Reads the Visual Grammar section of a project's characters.md
and produces a dict mapping character names to lens language strings.

The grammar data feeds into the prompt engine's grammar injection layer.
"""

from __future__ import annotations

import re
from pathlib import Path


def extract_grammars(characters_md_path: str | Path) -> dict[str, str]:
    """Extract per-character lens grammar strings from characters.md.

    Looks for a ## Visual Grammar section, then finds per-character
    subsections (### Character's Grammar: ...) and extracts the
    Lens: and Characteristics: fields.

    Args:
        characters_md_path: Path to the project's characters.md file.

    Returns:
        Dict mapping character name (uppercase) to lens grammar string.
        Example: {"SADIE": "85mm telephoto, shallow depth of field, cool cyan"}
    """
    path = Path(characters_md_path)
    if not path.exists():
        return {}

    text = path.read_text(encoding="utf-8")

    # Find Visual Grammar section
    grammar_section = ""
    in_grammar = False
    lines = text.split("\n")
    for line in lines:
        if re.match(r"^##\s+Visual Grammar", line, re.IGNORECASE):
            in_grammar = True
            continue
        if in_grammar:
            # End at next ## heading (not ###)
            if re.match(r"^##\s+[^#]", line):
                break
            grammar_section += line + "\n"

    if not grammar_section:
        return {}

    # Extract per-character grammars
    grammars = {}
    current_char = None
    current_parts = []

    for line in grammar_section.split("\n"):
        # Character grammar header: ### Sadie's Grammar: In the Mood for Love
        char_match = re.match(
            r"^###\s+(\w+)['\u2019]s\s+Grammar",
            line,
            re.IGNORECASE,
        )
        if char_match:
            # Save previous character
            if current_char and current_parts:
                grammars[current_char] = ", ".join(current_parts)
            current_char = char_match.group(1).upper()
            current_parts = []
            continue

        if current_char:
            # Extract Lens line
            lens_match = re.match(r"^\*\*Lens:\*\*\s*(.+)", line)
            if lens_match:
                current_parts.append(lens_match.group(1).strip().rstrip("."))

            # Extract key characteristics
            char_match_line = re.match(
                r"^-\s+Color temperature:\s*(.+)", line, re.IGNORECASE,
            )
            if char_match_line:
                current_parts.append(char_match_line.group(1).strip().rstrip("."))

            # Camera style
            cam_match = re.match(
                r"^-\s+Camera:\s*(.+)", line, re.IGNORECASE,
            )
            if cam_match:
                current_parts.append(cam_match.group(1).strip().rstrip("."))

    # Save last character
    if current_char and current_parts:
        grammars[current_char] = ", ".join(current_parts)

    return grammars


def extract_grammar(characters_md_text: str, character_name: str) -> dict:
    """Extract visual grammar for a single character from characters.md text.

    Unlike extract_grammars() which takes a file path and returns all grammars
    as strings, this returns a structured dict for one character suitable for
    the breakdown agent.

    Args:
        characters_md_text: Raw text content of characters.md.
        character_name: Character name to look up (case-insensitive).

    Returns:
        Dict with keys: primary_character, lens, dof, color_temp, bleed_factor.
        Empty dict if character not found.
    """
    if not characters_md_text or not character_name:
        return {}

    result = {"primary_character": character_name.upper()}
    char_upper = character_name.upper()

    # Find the character's grammar section
    in_section = False
    for line in characters_md_text.split("\n"):
        # Match ### Sadie's Grammar: ...
        header = re.match(
            r"^###\s+(\w+)['\u2019]s\s+Grammar",
            line,
            re.IGNORECASE,
        )
        if header:
            if header.group(1).upper() == char_upper:
                in_section = True
            elif in_section:
                break  # hit next character's section
            continue

        if not in_section:
            continue

        # Lens
        lens_match = re.match(r"^\*\*Lens:\*\*\s*(.+)", line)
        if lens_match:
            result["lens"] = lens_match.group(1).strip().rstrip(".")

        # DOF
        dof_match = re.match(r"^-\s+DOF:\s*(.+)", line, re.IGNORECASE)
        if dof_match:
            result["dof"] = dof_match.group(1).strip().rstrip(".")

        # Color temperature
        ct_match = re.match(r"^-\s+Color temperature:\s*(.+)", line, re.IGNORECASE)
        if ct_match:
            result["color_temp"] = ct_match.group(1).strip().rstrip(".")

        # Bleed factor
        bf_match = re.match(r"^-\s+Bleed factor:\s*([\d.]+)", line, re.IGNORECASE)
        if bf_match:
            result["bleed_factor"] = float(bf_match.group(1))

    # Only return if we found the character
    if len(result) > 1:
        return result
    return {}


def get_exposure_level(episode_number: int, total_episodes: int = 16) -> int:
    """Determine exposure level from episode number.

    Puzzle Box has 4 exposures of 4 episodes each:
    - Eps 1-4: Exposure 1 (THE MOOD)
    - Eps 5-8: Exposure 2 (THE FRACTURE)
    - Eps 9-12: Exposure 3 (THE SHIFT)
    - Eps 13-16: Exposure 4 (THE TRUTH)
    """
    if episode_number <= 4:
        return 1
    elif episode_number <= 8:
        return 2
    elif episode_number <= 12:
        return 3
    else:
        return 4