#!/usr/bin/env python3
"""
Episode & Treatment Compiler for Recoil
Combines individual episode files or treatment into .fountain format.

Usage:
    python compile_episodes.py <project_path> [output_name]
    python compile_episodes.py <project_path> --treatment
    python compile_episodes.py <project_path> --metadata

Examples:
    python compile_episodes.py /path/to/leviathan
    python compile_episodes.py /path/to/leviathan MY_SERIES_FINAL.fountain
    python compile_episodes.py /path/to/leviathan --treatment
    python compile_episodes.py /path/to/leviathan --metadata

Episode mode (default):
1. Find all ep_XXX.md files in the project's episodes/ folder
2. Strip metadata (word counts, loglines, cliffhanger annotations, etc.)
3. Combine them into a single .fountain file with page breaks
4. Output to the project root folder

Metadata mode (--metadata):
1. Same as episode mode, but PRESERVES:
   - Episode titles (centered heading)
   - Cliffhanger type annotations (centered at end)
2. Output: [TITLE]_WITH_METADATA.fountain

Treatment mode (--treatment):
1. Read treatment.md from project root
2. Extract episode titles, prose paragraphs, and cliffhangers
3. Strip all metadata (threads, beat types, THE MOMENT, etc.)
4. Output to treatment_reader.fountain
"""

import re
import sys
from pathlib import Path
from datetime import date


def extract_episode_content(file_path: Path) -> tuple[str, str]:
    """
    Extract clean fountain content from an episode file.
    Returns (episode_title, clean_content)

    Handles two formats:

    Format A (code block wrapped):
    ```
    # Episode X: Title
    **Word Count:** ...
    ---
    ```fountain
    [[EPISODE X: TITLE]]
    LOGLINE: ...
    [content]
    ```
    ---
    **CLIFFHANGER TYPE:** ...
    ```

    Format B (direct fountain - olympus style):
    ```
    [[EPISODE X: TITLE]]
    **Word Count:** ...
    **Dialogue:** ...
    ---
    [content]
    ---
    **CLIFFHANGER TYPE:** ...
    ```
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Step 1: Try to extract content between ```fountain and ```
    fountain_match = re.search(r'```fountain\s*\n(.*?)\n```', content, re.DOTALL)

    if fountain_match:
        content = fountain_match.group(1)
    else:
        # Format B: Direct fountain format (no code blocks)
        # Find content starting with [[EPISODE
        episode_start = re.search(r'(\[\[EPISODE.*)', content, re.DOTALL)
        if episode_start:
            content = episode_start.group(1)

    # Step 2: Extract episode title
    title_match = re.search(r'\[\[EPISODE \d+:.*?\]\]', content)
    episode_title = title_match.group(0) if title_match else "UNTITLED"

    # Step 3: Remove header metadata (Word Count, Dialogue, Era lines at top)
    # These appear between title and first ---
    content = re.sub(r'^(\[\[EPISODE[^\]]+\]\])\s*\n\*\*Word Count:.*?\n---',
                     r'\1\n', content, flags=re.DOTALL | re.IGNORECASE)
    content = re.sub(r'^(\[\[EPISODE[^\]]+\]\])\s*\n\*\*Era:.*?\n---',
                     r'\1\n', content, flags=re.DOTALL | re.IGNORECASE)

    # Step 4: Remove LOGLINE if present
    content = re.sub(r'LOGLINE:.*?\n\n', '', content, flags=re.MULTILINE)

    # Step 5: Remove trailing metadata (after last ---)
    # Match --- followed by **CLIFFHANGER or **NEXT at end of content
    content = re.sub(r'\n---\s*\n\*\*CLIFFHANGER TYPE:.*$', '', content, flags=re.DOTALL)
    content = re.sub(r'\n---\s*\n\*\*NEXT:.*$', '', content, flags=re.DOTALL)

    # Also handle case where CLIFFHANGER/NEXT appear without preceding ---
    content = re.sub(r'\n\*\*CLIFFHANGER TYPE:.*$', '', content, flags=re.DOTALL)
    content = re.sub(r'\n\*\*NEXT:.*$', '', content, flags=re.DOTALL)

    # Step 6: Remove any stray ``` markers
    content = re.sub(r'```\s*$', '', content)
    content = re.sub(r'^```\w*\s*\n', '', content)

    # Step 7: Clean up extra whitespace
    content = content.strip() + '\n'

    return episode_title, content


def get_series_title(project_path: Path) -> str:
    """
    Try to extract series title from bible or use folder name.
    """
    bible_path = project_path / 'bible' / 'series_bible.md'
    if bible_path.exists():
        with open(bible_path, 'r', encoding='utf-8') as f:
            content = f.read()
            # Look for title in first few lines
            match = re.search(r'^#\s*(.+?)(?:\s*[-—]|$)', content, re.MULTILINE)
            if match:
                return match.group(1).strip()

    # Fall back to folder name
    return project_path.name.upper()


def compile_treatment(project_path: str) -> str:
    """
    Compile treatment.md into a readable .fountain file.
    Extracts only episode titles, prose paragraphs, and cliffhangers.
    Returns the path to the compiled file.
    """
    project = Path(project_path).resolve()
    treatment_path = project / 'treatment.md'

    if not treatment_path.exists():
        raise FileNotFoundError(f"Treatment not found: {treatment_path}")

    with open(treatment_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Get series title
    series_title = get_series_title(project)

    # Build output
    lines = []

    # Title page
    lines.append(f"Title: {series_title} — Treatment")
    lines.append("Credit: A Microdrama Series")
    lines.append("Author: [Created with Claude]")
    lines.append(f"Draft date: {date.today().isoformat()}")
    lines.append("")
    lines.append("===")
    lines.append("")

    # Process treatment content
    # Split by episode headers
    episode_pattern = r'(### Episode \d+: "[^"]+"\s*(?:💔)?)'
    parts = re.split(episode_pattern, content)

    episode_count = 0
    for i in range(1, len(parts), 2):
        if i + 1 >= len(parts):
            break

        header = parts[i].strip()
        body = parts[i + 1]

        # Extract episode number and title
        title_match = re.match(r'### Episode (\d+): "([^"]+)"', header)
        if not title_match:
            continue

        ep_num = title_match.group(1)
        ep_title = title_match.group(2)
        episode_count += 1

        # Format as Fountain section header
        lines.append(f"[[EPISODE {ep_num}: {ep_title.upper()}]]")
        lines.append("")

        # Extract prose paragraph (skip metadata lines)
        # Find the prose - it's the paragraph after the metadata block
        # Metadata lines start with ** or contain [PLANT/ADVANCE/PAYOFF
        body_lines = body.strip().split('\n')
        prose_lines = []
        cliffhanger = None
        in_prose = False

        for line in body_lines:
            stripped = line.strip()

            # Skip empty lines at start
            if not stripped and not in_prose:
                continue

            # Skip metadata lines
            if stripped.startswith('**Sequence:'):
                continue
            if stripped.startswith('**Threads:'):
                continue
            if stripped.startswith('**THE MOMENT:'):
                continue
            if stripped.startswith('**VOICE SEED:'):
                continue
            if stripped.startswith('---'):
                continue
            if stripped.startswith('## '):  # Act headers
                continue
            if stripped.startswith('### Sequence'):  # Sequence headers
                continue

            # Extract cliffhanger
            if stripped.startswith('**[CLIFFHANGER:'):
                # Clean up the cliffhanger text
                cliff_match = re.match(r'\*\*\[CLIFFHANGER:\s*(.+?)\]\*\*', stripped)
                if cliff_match:
                    cliffhanger = cliff_match.group(1).strip()
                continue

            # Skip validation metrics section and beyond
            if stripped.startswith('## VALIDATION METRICS'):
                break

            # If we get here and have content, it's prose
            if stripped:
                in_prose = True
                prose_lines.append(stripped)

        # Add prose as action block (single line - Fountain handles wrapping)
        if prose_lines:
            prose = ' '.join(prose_lines)
            lines.append(prose)
            lines.append("")

        # Add cliffhanger as emphasized action
        if cliffhanger:
            lines.append(f"!{cliffhanger}")
            lines.append("")

    # Output path
    output_path = project / 'treatment_reader.fountain'

    # Write file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))

    print(f"\nCompiled {episode_count} episodes from treatment to:")
    print(f"  {output_path}")

    # Calculate stats
    total_chars = sum(len(line) for line in lines)
    print(f"  {len(lines):,} lines, {total_chars:,} characters")

    return str(output_path)


def extract_episode_with_metadata(file_path: Path) -> tuple[str, str, str]:
    """
    Extract content WITH title and cliffhanger preserved.
    Returns (episode_title, clean_content, cliffhanger_type)
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # Get episode title from first line (# Episode N: Title)
    title_match = re.search(r'^# (Episode \d+: .+)$', content, re.MULTILINE)
    title = title_match.group(1) if title_match else ""

    # Get cliffhanger type
    cliff_match = re.search(r'\*\*CLIFFHANGER TYPE:\*\* (.+)$', content, re.MULTILINE)
    cliffhanger = cliff_match.group(1) if cliff_match else ""

    # Extract main content using standard function
    _, clean_content = extract_episode_content(file_path)

    return title, clean_content, cliffhanger


def compile_episodes_with_metadata(project_path: str, output_name: str = None) -> str:
    """
    Compile episodes with titles and cliffhanger annotations preserved.
    Returns the path to the compiled file.
    """
    project = Path(project_path).resolve()
    episodes_dir = project / 'episodes'

    if not episodes_dir.exists():
        raise FileNotFoundError(f"Episodes directory not found: {episodes_dir}")

    # Find all episode files
    episode_files = sorted(episodes_dir.glob('ep_*.md'))

    if not episode_files:
        raise FileNotFoundError(f"No episode files found in: {episodes_dir}")

    print(f"Found {len(episode_files)} episode files")

    # Get series title
    series_title = get_series_title(project)

    # Build output
    lines = []

    # Title page
    lines.append(f"Title: {series_title}")
    lines.append("Credit: A Microdrama Series")
    lines.append("Author: [Created with Claude]")
    lines.append(f"Draft date: {date.today().isoformat()}")
    lines.append("")
    lines.append("===")
    lines.append("")

    # Process each episode
    for i, ep_file in enumerate(episode_files):
        print(f"Processing: {ep_file.name}")

        title, content, cliffhanger = extract_episode_with_metadata(ep_file)

        # Add episode title as centered heading
        if title:
            lines.append(f"> {title.upper()} <")
            lines.append("")

        # Add main content
        lines.append(content)

        # Add cliffhanger callout
        if cliffhanger:
            lines.append("")
            lines.append(f"> [CLIFFHANGER: {cliffhanger}] <")

        # Add page break between episodes (not after the last one)
        if i < len(episode_files) - 1:
            lines.append("")
            lines.append("===")
            lines.append("")

    # Determine output path
    if output_name:
        if not output_name.endswith('.fountain'):
            output_name += '.fountain'
        output_path = project / output_name
    else:
        output_path = project / f"{series_title}_WITH_METADATA.fountain"

    # Write file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))

    print(f"\nCompiled {len(episode_files)} episodes to:")
    print(f"  {output_path}")

    # Calculate stats
    total_lines = sum(1 for line in lines if line.strip())
    total_chars = sum(len(line) for line in lines)
    print(f"  {total_lines:,} lines, {total_chars:,} characters")

    return str(output_path)


def compile_episodes(project_path: str, output_name: str = None) -> str:
    """
    Main compilation function (clean, no metadata).
    Returns the path to the compiled file.
    """
    project = Path(project_path).resolve()
    episodes_dir = project / 'episodes'

    if not episodes_dir.exists():
        raise FileNotFoundError(f"Episodes directory not found: {episodes_dir}")

    # Find all episode files
    episode_files = sorted(episodes_dir.glob('ep_*.md'))

    if not episode_files:
        raise FileNotFoundError(f"No episode files found in: {episodes_dir}")

    print(f"Found {len(episode_files)} episode files")

    # Get series title
    series_title = get_series_title(project)

    # Build output
    lines = []

    # Title page
    lines.append(f"Title: {series_title}")
    lines.append("Credit: A Microdrama Series")
    lines.append("Author: [Created with Claude]")
    lines.append(f"Draft date: {date.today().isoformat()}")
    lines.append("")
    lines.append("===")
    lines.append("")

    # Process each episode
    for i, ep_file in enumerate(episode_files):
        print(f"Processing: {ep_file.name}")

        title, content = extract_episode_content(ep_file)

        lines.append(content)

        # Add page break between episodes (not after the last one)
        if i < len(episode_files) - 1:
            lines.append("")
            lines.append("===")
            lines.append("")

    # Determine output path
    if output_name:
        if not output_name.endswith('.fountain'):
            output_name += '.fountain'
        output_path = project / output_name
    else:
        output_path = project / f"{series_title}_COMPLETE.fountain"

    # Write file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))

    print(f"\nCompiled {len(episode_files)} episodes to:")
    print(f"  {output_path}")

    # Calculate stats
    total_lines = sum(1 for line in lines if line.strip())
    total_chars = sum(len(line) for line in lines)
    print(f"  {total_lines:,} lines, {total_chars:,} characters")

    return str(output_path)


def main():
    if len(sys.argv) < 2:
        print(__doc__)
        sys.exit(1)

    project_path = sys.argv[1]

    # Check for mode flags
    treatment_mode = '--treatment' in sys.argv
    metadata_mode = '--metadata' in sys.argv

    if treatment_mode:
        try:
            compile_treatment(project_path)
        except Exception as e:
            print(f"Error: {e}")
            sys.exit(1)
    elif metadata_mode:
        # Metadata mode - preserve titles and cliffhangers
        output_name = None
        for arg in sys.argv[2:]:
            if not arg.startswith('--'):
                output_name = arg
                break

        try:
            compile_episodes_with_metadata(project_path, output_name)
        except Exception as e:
            print(f"Error: {e}")
            sys.exit(1)
    else:
        # Default episode mode (clean)
        output_name = None
        for arg in sys.argv[2:]:
            if not arg.startswith('--'):
                output_name = arg
                break

        try:
            compile_episodes(project_path, output_name)
        except Exception as e:
            print(f"Error: {e}")
            sys.exit(1)


if __name__ == '__main__':
    main()
