#!/usr/bin/env python3
"""Breakdown Agent — translates narrative episodes into visual breakdowns.

Reads: episode.md + FORMAT.md + characters.md
Produces: breakdown.json (per-episode visual shot list)

The agent acts as a Director/DP: it reads the narrative script and makes
creative decisions about shot sizes, lens selection, lighting, blocking,
and audio design.

Architecture:
    1. Deterministic parser extracts structure (beats, VO text, characters)
    2. Grammar extractor reads per-character visual grammar from characters.md
    3. LLM agent makes creative visual decisions per shot
    4. Output validated against breakdown_schema_v1.json
    5. Look detector scans scripts for wardrobe/appearance changes across episodes
"""
import json
import re
from pathlib import Path
from typing import Optional

RECOIL_ROOT = Path(__file__).resolve().parent.parent

# --------------------------------------------------------------------------- #
#  Look Detection — wardrobe/appearance change indicators in episode scripts   #
# --------------------------------------------------------------------------- #

# Keywords that signal a wardrobe or appearance change in script text.
# Grouped by category for clarity; flattened into a single set at module load.
_WARDROBE_KEYWORDS = {
    "wearing", "changed into", "now in", "dressed in", "puts on",
    "switched to", "donned", "stripped down to", "outfitted in",
    "suit", "armor", "armored", "uniform", "jumpsuit", "vest",
    "helmet", "cloak", "disguise", "disguised",
}
_INJURY_KEYWORDS = {
    "blood", "bloody", "bleeding", "wound", "wounded", "torn",
    "burned", "burn marks", "scarred", "bandaged", "limping",
    "bruised", "stitches", "sling", "cast", "injured",
}
_TRANSFORMATION_KEYWORDS = {
    "transformed", "mutated", "cybernetic", "augmented", "prosthetic",
    "implant", "graft", "modified",
}

LOOK_CHANGE_INDICATORS = _WARDROBE_KEYWORDS | _INJURY_KEYWORDS | _TRANSFORMATION_KEYWORDS

# Pre-compiled regex for scanning: matches any indicator as a whole word
_INDICATOR_PATTERN = re.compile(
    r'\b(' + '|'.join(re.escape(kw) for kw in sorted(LOOK_CHANGE_INDICATORS, key=len, reverse=True)) + r')\b',
    re.IGNORECASE,
)


def detect_look_changes(
    episode_texts: dict[int, str],
    character_name: str,
) -> list[dict]:
    """Scan episode scripts for wardrobe/appearance changes for a character.

    For each episode where the character appears, checks the script text
    for look-change indicators near the character's name. When a shift is
    detected, a new look boundary is proposed.

    Args:
        episode_texts: Mapping of episode number to the full script text
                       for that episode.
        character_name: Character name to scan for (case-insensitive).

    Returns:
        List of look dicts, each with:
          - name: str  (e.g. "base", "injured_ep08", "combat")
          - description: str  (extracted or summarized appearance text)
          - episodes: list[int]  (which episodes use this look)
          - trigger: str | None  (what caused the look change, or None for base)
    """
    if not episode_texts or not character_name:
        return []

    char_pattern = re.compile(
        r'\b' + re.escape(character_name) + r'\b',
        re.IGNORECASE,
    )

    # Collect per-episode appearance snippets where changes are detected
    # Structure: list of (episode_num, indicator_keyword, surrounding_context)
    change_events: list[tuple[int, str, str]] = []

    for ep_num in sorted(episode_texts.keys()):
        text = episode_texts[ep_num]
        # Only process if the character is mentioned in this episode
        if not char_pattern.search(text):
            continue

        # Search for look-change indicators near the character name.
        # "Near" = within 200 characters of a character mention.
        for char_match in char_pattern.finditer(text):
            start = max(0, char_match.start() - 200)
            end = min(len(text), char_match.end() + 200)
            window = text[start:end]

            for indicator_match in _INDICATOR_PATTERN.finditer(window):
                keyword = indicator_match.group(0).lower()
                # Extract a context snippet around the indicator
                ind_start = max(0, indicator_match.start() - 60)
                ind_end = min(len(window), indicator_match.end() + 60)
                context = window[ind_start:ind_end].strip()
                # Clean up context: collapse whitespace, strip incomplete words at edges
                context = re.sub(r'\s+', ' ', context)
                change_events.append((ep_num, keyword, context))
                break  # One indicator per character-mention is enough

    # Build looks from change events
    all_episodes = sorted(
        ep for ep in episode_texts
        if char_pattern.search(episode_texts[ep])
    )

    if not all_episodes:
        return []

    looks: list[dict] = []

    if not change_events:
        # No changes detected — single base look across all episodes
        looks.append({
            "name": "base",
            "description": "",
            "episodes": all_episodes,
            "trigger": None,
        })
        return looks

    # Determine which episodes have changes
    change_episodes = sorted(set(ep for ep, _, _ in change_events))
    first_change_ep = change_episodes[0]

    # Base look: episodes before the first change
    base_episodes = [ep for ep in all_episodes if ep < first_change_ep]
    if not base_episodes:
        # First episode already has a change indicator — still create a base
        # but it might only cover that first episode
        base_episodes = [all_episodes[0]]

    looks.append({
        "name": "base",
        "description": "",
        "episodes": base_episodes,
        "trigger": None,
    })

    # Group change events by contiguous episode ranges
    # Each distinct change indicator type in a new episode range = new look
    prev_look_end = base_episodes[-1]
    look_counter = 0

    for ep_num, keyword, context in change_events:
        if ep_num <= prev_look_end and look_counter > 0:
            # Same range as previous look — extend it
            if ep_num not in looks[-1]["episodes"]:
                looks[-1]["episodes"].append(ep_num)
            continue

        look_counter += 1

        # Determine the episode range for this look
        # It extends from the change episode until the next change or end
        next_changes = [e for e, _, _ in change_events if e > ep_num]
        if next_changes:
            look_end = next_changes[0] - 1
        else:
            look_end = all_episodes[-1]

        look_episodes = [ep for ep in all_episodes if ep_num <= ep <= look_end]

        # Classify the look
        if keyword in _INJURY_KEYWORDS:
            look_name = f"injured_ep{ep_num:02d}"
        elif keyword in _TRANSFORMATION_KEYWORDS:
            look_name = f"transformed_ep{ep_num:02d}"
        elif keyword in {"armor", "armored", "helmet", "vest"}:
            look_name = "combat"
        elif keyword in {"disguise", "disguised"}:
            look_name = f"disguise_ep{ep_num:02d}"
        else:
            look_name = f"wardrobe_ep{ep_num:02d}"

        looks.append({
            "name": look_name,
            "description": context,
            "episodes": look_episodes,
            "trigger": f"{keyword} in ep{ep_num:02d}",
        })

        prev_look_end = look_end

    return looks


def extract_character_looks(
    bible_characters: dict,
    episode_texts: Optional[dict[int, str]] = None,
) -> dict:
    """Add looks[] to each character entry in a bible characters dict.

    If episode_texts are provided, runs look detection. Otherwise, creates
    a default base look from the existing appearance/wardrobe data.

    This is ADDITIVE — the flat 'appearance' field is preserved for backward
    compatibility.

    Args:
        bible_characters: The characters dict from a global bible, keyed by
                          character name/id. Each value is a dict with at
                          minimum an 'appearance' or 'visual_description' field.
        episode_texts:    Optional mapping of episode number to script text.
                          When provided, enables cross-episode look detection.

    Returns:
        The same dict with looks[] added to each character entry.
    """
    for char_name, char_data in bible_characters.items():
        if not isinstance(char_data, dict):
            continue

        # Already has looks — skip (human-edited or prior run)
        if "looks" in char_data:
            continue

        # Extract base appearance from existing fields
        base_desc = (
            char_data.get("wardrobe_description", "")
            or char_data.get("clothing", "")
            or char_data.get("appearance", "")
        )

        if episode_texts:
            looks = detect_look_changes(episode_texts, char_name)
            # Fill in the base look description from the bible if empty
            if looks and looks[0]["name"] == "base" and not looks[0]["description"]:
                looks[0]["description"] = base_desc
        else:
            # No episode texts — single base look from bible data
            all_eps = char_data.get("episodes", [])
            looks = [{
                "name": "base",
                "description": base_desc,
                "episodes": all_eps if all_eps else [],
                "trigger": None,
            }]

        char_data["looks"] = looks

    return bible_characters


def generate_episode_look_map(bible_characters: dict) -> dict:
    """Generate an episode_look_map from bible characters with looks[].

    Produces a mapping from character name to a dict of episode -> look_name,
    suitable for writing to _pipeline/state/visual/episode_look_map.json.

    Args:
        bible_characters: Characters dict with looks[] arrays.

    Returns:
        Dict like:
        {
          "JADE": {"1": "base", "2": "base", "8": "injured_ep08"},
          "KIT":  {"1": "base", ...}
        }
    """
    look_map: dict[str, dict[str, str]] = {}

    for char_name, char_data in bible_characters.items():
        if not isinstance(char_data, dict):
            continue

        looks = char_data.get("looks", [])
        if not looks:
            continue

        char_map: dict[str, str] = {}
        for look in looks:
            look_name = look.get("name", "base")
            for ep in look.get("episodes", []):
                char_map[str(ep)] = look_name

        if char_map:
            look_map[char_name] = char_map

    return look_map


def write_episode_look_map(
    bible_characters: dict,
    output_dir: Path,
) -> Path:
    """Generate and write episode_look_map.json to _pipeline/state/visual/.

    Args:
        bible_characters: Characters dict with looks[] arrays.
        output_dir: Project assets/ directory (used to derive the project root).

    Returns:
        Path to the written file.
    """
    look_map = generate_episode_look_map(bible_characters)

    from recoil.core.paths import ProjectPaths
    ppaths = ProjectPaths.from_root(output_dir.parent)
    ppaths.visual_state_dir.mkdir(parents=True, exist_ok=True)
    output_path = ppaths.episode_look_map
    output_path.write_text(json.dumps(look_map, indent=2, sort_keys=True))

    print(f"Episode look map written: {output_path}")
    return output_path


def parse_episode_narrative(episode_path: Path) -> dict:
    """Parse a narrative-only episode into structured data.

    Returns:
        {
            'episode_id': 'ep_01',
            'metadata': { exposure, sequence, rhythm, vo, ending_type, ... },
            'fragment': { recontextualizes, original_meaning, new_meaning, carrier },
            'the_moment': str,
            'beats': [
                { 'beat_id': 'entry_image', 'text': '...' },
                { 'beat_id': 'voice', 'vo_character': 'SADIE', 'vo_text': '...', 'stage_direction': '...' },
                { 'beat_id': 'linger', 'text': '...' },
            ],
            'vote': { 'type': 'world|oracle', 'prompt': '...', 'options': [...] }
        }
    """
    content = episode_path.read_text()
    result = {}

    # Episode ID from filename
    stem = episode_path.stem  # ep_01
    result['episode_id'] = stem

    # Metadata
    metadata = {}
    meta_match = re.search(r'## Metadata\n(.*?)(?=\n##|\n---)', content, re.DOTALL)
    if meta_match:
        for line in meta_match.group(1).strip().split('\n'):
            if ':' in line:
                key, val = line.split(':', 1)
                key = key.strip().lstrip('- ').lower().replace(' ', '_').replace('/', '_')
                metadata[key] = val.strip()
    result['metadata'] = metadata

    # Fragment
    fragment = {}
    frag_match = re.search(r'## FRAGMENT\n(.*?)(?=\n##|\n---)', content, re.DOTALL)
    if frag_match:
        for line in frag_match.group(1).strip().split('\n'):
            if ':' in line:
                key, val = line.split(':', 1)
                key = key.strip().lstrip('- ').lower().replace(' ', '_')
                fragment[key] = val.strip()
    result['fragment'] = fragment

    # THE MOMENT
    moment_match = re.search(r'## THE MOMENT\n(.*?)(?=\n##|\n---)', content, re.DOTALL)
    result['the_moment'] = moment_match.group(1).strip() if moment_match else ''

    # Beats
    beats = []
    # ENTRY IMAGE
    ei_match = re.search(r'### [^\n]*ENTRY IMAGE[^\n]*\n(.*?)(?=\n###|\n##)', content, re.DOTALL)
    if ei_match:
        beats.append({'beat_id': 'entry_image', 'text': ei_match.group(1).strip()})

    # VOICE
    voice_match = re.search(r'### [^\n]*VOICE[^\n]*\n(.*?)(?=\n###|\n##)', content, re.DOTALL)
    if voice_match:
        voice_text = voice_match.group(1).strip()
        vo_char_match = re.search(r'\[VO:\s*(\w+)\]', voice_text)
        vo_text_match = re.search(r'"([^"]*)"', voice_text, re.DOTALL)
        beats.append({
            'beat_id': 'voice',
            'vo_character': vo_char_match.group(1) if vo_char_match else '',
            'vo_text': vo_text_match.group(1) if vo_text_match else '',
            'stage_direction': re.sub(r'\[VO:.*?\]\n"[^"]*"', '', voice_text).strip()
        })

    # LINGER
    linger_match = re.search(r'### [^\n]*LINGER[^\n]*\n(.*?)(?=\n##|\Z)', content, re.DOTALL)
    if linger_match:
        beats.append({'beat_id': 'linger', 'text': linger_match.group(1).strip()})

    # BREAK / AFTERMATH (FRACTURE episodes)
    break_match = re.search(r'### [^\n]*BREAK[^\n]*\n(.*?)(?=\n###|\n##)', content, re.DOTALL)
    if break_match:
        beats.append({'beat_id': 'break', 'text': break_match.group(1).strip()})
    aftermath_match = re.search(r'### [^\n]*AFTERMATH[^\n]*\n(.*?)(?=\n##|\Z)', content, re.DOTALL)
    if aftermath_match:
        beats.append({'beat_id': 'aftermath', 'text': aftermath_match.group(1).strip()})

    result['beats'] = beats

    # Vote
    vote = {}
    world_match = re.search(r'## WORLD VOTE\n(.*?)(?=\n##|\Z)', content, re.DOTALL)
    oracle_match = re.search(r'## ORACLE\n(.*?)(?=\n##|\Z)', content, re.DOTALL)
    if oracle_match:
        vote['type'] = 'oracle'
        vote['text'] = oracle_match.group(1).strip()
    elif world_match:
        vote['type'] = 'world'
        vote['text'] = world_match.group(1).strip()
    result['vote'] = vote

    return result


def build_breakdown_prompt(parsed_episode: dict, format_md: str, characters_md: str,
                           grammar: dict) -> str:
    """Build the LLM prompt for generating the visual breakdown.

    The LLM acts as a Director/DP — translating narrative intent into
    shot specifications based on the format rules and character visual grammar.
    """
    ep_id = parsed_episode['episode_id']
    metadata = parsed_episode['metadata']
    beats = parsed_episode['beats']

    prompt = f"""You are the Director of Photography for AFTERIMAGE, a cyberpunk mood-mystery microserial.

Your job: translate this narrative episode into a visual breakdown (shot list with technical specs).

## The Episode: {ep_id}
Exposure: {metadata.get('exposure', '')}
Rhythm: {metadata.get('rhythm', '')}
Ending Type: {metadata.get('ending_type', '')}

## Narrative Beats:
"""
    for beat in beats:
        prompt += f"\n### {beat['beat_id'].upper()}\n"
        if beat['beat_id'] == 'voice':
            prompt += f"VO Character: {beat.get('vo_character', '')}\n"
            prompt += f"VO Text: \"{beat.get('vo_text', '')}\"\n"
            if beat.get('stage_direction'):
                prompt += f"Stage Direction: {beat['stage_direction']}\n"
        else:
            prompt += f"{beat.get('text', '')}\n"

    prompt += f"""
## Character Visual Grammar (from characters.md):
Primary character this episode: {grammar.get('primary_character', '')}
Lens: {grammar.get('lens', '')}
DOF: {grammar.get('dof', '')}
Color temperature: {grammar.get('color_temp', '')}
Bleed factor: {grammar.get('bleed_factor', 0)} (0 = pure character grammar, 1 = fully blended)

## Format Rules (from FORMAT.md):
- ENTRY IMAGE: 2-5 seconds, wordless, NO characters speaking
- VOICE: 16-20 seconds, 3-4 shots, VO plays over visuals
- LINGER: 6-8 seconds, held shot, unresolved
- Shot count per episode: 2-7 shots total
- Composition priorities: faces, negative space, texture, single characters
- VO ORTHOGONALITY: visuals must NOT literally depict what the VO describes

## Your Output:
Return a JSON object matching the breakdown schema. For each beat, specify:
- shot_id: "{ep_id}_{{beat}}_{{number}}" (e.g., "ep_01_ei_01")
- shot_type: ECU, CU, MCU, MS, MLS, LS, WS, EWS
- lens: from character grammar (or blended if bleed_factor > 0)
- dof: shallow or deep (from character grammar)
- color_temp: from character grammar
- lighting: describe the light sources and direction
- camera_movement: static, drift_left, drift_right, push_in, pull_back, track
- action: what is physically happening in this shot
- characters: list of character IDs visible
- sfx: spot sound effects for this shot
- duration_s: seconds this shot holds

Also specify global_audio with ambient_bed, vo_character, vo_text, vo_delivery.

Return ONLY valid JSON. No markdown, no explanation.
"""
    return prompt


def generate_breakdown(episode_path: Path, project_path: Path,
                       format_name: str = 'puzzle_box',
                       dry_run: bool = False,
                       detect_looks: bool = False) -> dict:
    """Generate a visual breakdown for a single episode.

    Args:
        episode_path: Path to the narrative-only episode .md
        project_path: Path to the project root
        format_name: Format to use (reads FORMAT.md and characters.md)
        dry_run: If True, return the prompt without calling the LLM
        detect_looks: If True, scan all episode scripts for character
                      wardrobe/appearance changes and output looks[] in
                      the bible character entries, plus write
                      episode_look_map.json to _pipeline/state/visual/.

    Returns:
        breakdown dict (validated against schema)
    """
    from visual.grammar_extractor import extract_grammar

    # 1. Parse the episode
    parsed = parse_episode_narrative(episode_path)

    # 2. Load format and character data
    format_md_path = RECOIL_ROOT / 'formats' / format_name / 'FORMAT.md'
    characters_md_path = project_path / 'bible' / 'characters.md'

    format_md = format_md_path.read_text() if format_md_path.exists() else ''
    characters_md = characters_md_path.read_text() if characters_md_path.exists() else ''

    # 3. Extract grammar for this episode's primary character
    primary_char = ''
    for beat in parsed.get('beats', []):
        if beat.get('vo_character'):
            primary_char = beat['vo_character']
            break

    grammar = extract_grammar(characters_md, primary_char) if primary_char else {}

    # 4. Build the LLM prompt
    prompt = build_breakdown_prompt(parsed, format_md, characters_md, grammar)

    if dry_run:
        return {'prompt': prompt, 'parsed_episode': parsed, 'grammar': grammar}

    # 5. Call the LLM (Claude sub-agent or Gemini)
    # For now, return the parsed structure as a stub breakdown
    # The actual LLM call will be wired in during integration
    breakdown = _stub_breakdown(parsed, grammar)

    # 6. Look detection (optional)
    if detect_looks:
        # Load all episode scripts in the project for cross-episode scanning
        episode_texts = _load_all_episode_texts(project_path)

        # Load or build a bible characters dict
        bible_path = project_path / 'state' / 'visual' / 'global_bible.json'
        if bible_path.exists():
            bible = json.loads(bible_path.read_text())
            bible_characters = bible.get("characters", {})
        else:
            # Build a minimal characters dict from the breakdown
            bible_characters = {}
            for beat_entry in breakdown.get("beats", []):
                for shot in beat_entry.get("shots", []):
                    for char_id in shot.get("characters", []):
                        if char_id and char_id not in bible_characters:
                            bible_characters[char_id] = {
                                "char_id": char_id,
                                "appearance": "",
                                "episodes": [],
                            }

        # Run look detection and add looks[] to characters
        extract_character_looks(bible_characters, episode_texts)

        # Write episode_look_map.json
        assets_dir = project_path / 'assets'
        write_episode_look_map(bible_characters, assets_dir)

        # Attach looks summary to breakdown output
        breakdown["character_looks"] = {
            name: data.get("looks", [])
            for name, data in bible_characters.items()
            if isinstance(data, dict) and data.get("looks")
        }

        # Update bible on disk if it existed
        if bible_path.exists():
            bible["characters"] = bible_characters
            bible_path.write_text(json.dumps(bible, indent=2))
            print(f"Bible updated with looks[]: {bible_path}")

    # 7. Save breakdown
    output_dir = project_path / 'state' / 'visual' / 'breakdowns'
    output_dir.mkdir(parents=True, exist_ok=True)
    output_path = output_dir / f"{parsed['episode_id']}_breakdown.json"
    output_path.write_text(json.dumps(breakdown, indent=2))

    print(f"Breakdown written: {output_path}")
    return breakdown


def _load_all_episode_texts(project_path: Path) -> dict[int, str]:
    """Load all episode script texts from a project directory.

    Searches for episode .md files in several conventional locations:
      - {project}/episodes/
      - {project}/scripting/episodes/

    Returns:
        Dict mapping episode number (int) to the full text of the script.
    """
    episode_texts: dict[int, str] = {}

    from recoil.core.paths import ProjectPaths
    ppaths = ProjectPaths(project_root=project_path)
    search_dirs = [
        ppaths.episodes_dir,
    ]

    ep_num_pattern = re.compile(r'ep[_-]?(\d+)', re.IGNORECASE)

    for search_dir in search_dirs:
        if not search_dir.is_dir():
            continue
        for md_file in sorted(search_dir.glob("*.md")):
            match = ep_num_pattern.search(md_file.stem)
            if match:
                ep_num = int(match.group(1))
                if ep_num not in episode_texts:
                    try:
                        episode_texts[ep_num] = md_file.read_text(encoding="utf-8")
                    except OSError:
                        pass

    return episode_texts


def _stub_breakdown(parsed: dict, grammar: dict) -> dict:
    """Generate a stub breakdown from parsed episode data.

    Used when no LLM is available. Produces a structurally valid
    breakdown with default shot specs based on the character grammar.
    """
    ep_id = parsed['episode_id']
    beats = []

    shot_counter = 1
    for beat in parsed.get('beats', []):
        beat_id = beat['beat_id']

        if beat_id == 'entry_image':
            duration = 4
        elif beat_id == 'voice':
            duration = 18
        elif beat_id == 'linger':
            duration = 8
        elif beat_id == 'break':
            duration = 5
        elif beat_id == 'aftermath':
            duration = 25
        else:
            duration = 10

        # Default shot specs from grammar
        shots = [{
            'shot_id': f"{ep_id}_{beat_id[:2]}_{shot_counter:02d}",
            'shot_type': 'MS' if beat_id == 'voice' else 'ECU' if beat_id in ('entry_image', 'linger') else 'WS',
            'lens': grammar.get('lens', '50mm'),
            'dof': grammar.get('dof', 'shallow'),
            'color_temp': grammar.get('color_temp', 'neutral'),
            'lighting': 'neon ambient',
            'camera_movement': 'static',
            'action': beat.get('text', beat.get('vo_text', '')),
            'characters': [beat.get('vo_character', '').lower()] if beat.get('vo_character') else [],
            'sfx': [],
            'duration_s': duration
        }]
        shot_counter += 1

        beats.append({
            'beat_id': beat_id,
            'duration_s': duration,
            'shots': shots
        })

    # Global audio
    vo_beat = next((b for b in parsed.get('beats', []) if b.get('vo_character')), {})

    return {
        'episode_id': ep_id,
        'format': 'puzzle_box',
        'grammar': grammar,
        'global_audio': {
            'ambient_bed': 'Low city hum, neon buzz',
            'vo_character': vo_beat.get('vo_character', ''),
            'vo_text': vo_beat.get('vo_text', ''),
            'vo_delivery': 'Overheard memory register'
        },
        'beats': beats
    }


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description='Generate visual breakdown from narrative episode')
    parser.add_argument('episode', help='Path to episode .md file')
    parser.add_argument('project', help='Path to project root')
    parser.add_argument('--format', default='puzzle_box', help='Format name')
    parser.add_argument('--dry-run', action='store_true', help='Show prompt without calling LLM')
    parser.add_argument('--detect-looks', action='store_true',
                        help='Scan episode scripts for wardrobe/appearance changes '
                             'and output looks[] per character + episode_look_map.json')
    args = parser.parse_args()

    result = generate_breakdown(
        Path(args.episode),
        Path(args.project),
        format_name=args.format,
        dry_run=args.dry_run,
        detect_looks=args.detect_looks,
    )

    if args.dry_run:
        print(result['prompt'])
    else:
        print(json.dumps(result, indent=2))
