#!/usr/bin/env python3
"""
Kill Box Format Validator

Validates episodes against Kill Box (V12) format constraints.
Core logic extracted from tools/episode_metrics.py.

Interface:
    validate_episode(episode_path, constants=None) -> dict
    validate_batch(episode_paths, constants=None) -> dict
"""

import re
from pathlib import Path


# =============================================================================
# CONSTANTS LOADING
# =============================================================================

def _parse_constants_from_md(filepath: Path) -> dict:
    """Parse CONSTANTS.md and extract key values."""
    content = filepath.read_text()
    constants = {}

    table_pattern = r'\|\s*`([A-Z_]+)`\s*\|\s*([^|]+)\s*\|'
    for match in re.finditer(table_pattern, content):
        name = match.group(1).strip()
        value_str = match.group(2).strip()

        # Parse value
        if value_str.endswith('%'):
            try:
                constants[name] = float(value_str.rstrip('%'))
            except ValueError:
                constants[name] = value_str
        elif '-' in value_str and all(p.strip().isdigit() for p in value_str.split('-')):
            constants[name] = value_str  # Keep range as string
        else:
            try:
                constants[name] = int(value_str)
            except ValueError:
                try:
                    constants[name] = float(value_str)
                except ValueError:
                    constants[name] = value_str

    # Derive min/max from WORD_COUNT range
    wc = constants.get('WORD_COUNT', '')
    if isinstance(wc, str) and '-' in wc:
        parts = wc.split('-')
        constants['WORD_COUNT_MIN'] = int(parts[0].strip())
        constants['WORD_COUNT_MAX'] = int(parts[1].strip())

    return constants


def _load_default_constants() -> dict:
    """Load constants from this format's CONSTANTS.md."""
    constants_path = Path(__file__).parent / 'CONSTANTS.md'
    if constants_path.exists():
        return _parse_constants_from_md(constants_path)
    # Hardcoded fallback
    return {
        'WORD_COUNT_MIN': 450,
        'WORD_COUNT_MAX': 500,
        'DIALOGUE_MAX_PERCENT': 40,
        'MAX_EXCHANGES': 8,
        'MAX_ACTION_BLOCK_LINES': 4,
        'GENERATION_BATCH_SIZE': 5,
    }


# =============================================================================
# KILL BOX SECTION PATTERNS
# =============================================================================

KILL_BOX_SECTIONS = [
    (r'#\s*\[00:00\s*-\s*00:05\].*THE HOOK', 'THE HOOK', '[00:00 - 00:05]'),
    (r'#\s*\[00:05\s*-\s*00:15\].*THE SETUP', 'THE SETUP', '[00:05 - 00:15]'),
    (r'#\s*\[00:15\s*-\s*00:40\].*THE ESCALATION', 'THE ESCALATION', '[00:15 - 00:40]'),
    (r'#\s*\[00:40\s*-\s*00:70\].*THE TURN', 'THE TURN', '[00:40 - 00:70]'),
    (r'#\s*\[00:70\s*-\s*00:90\].*THE CLIFFHANGER', 'THE CLIFFHANGER', '[00:70 - 00:90]'),
]


# =============================================================================
# PARSING HELPERS
# =============================================================================

def _is_character_cue(line: str) -> bool:
    """Detect if a line is a character cue (speaker name) in Fountain format."""
    stripped = line.strip()
    if not stripped:
        return False
    if not stripped.isupper():
        return False
    if len(stripped) > 30:
        return False
    if stripped.startswith('.') or ':' in stripped:
        return False
    skip_words = [
        'ECU', 'CU', 'MCU', 'MS', 'WS', 'POV', 'SFX', 'VFX',
        'INSERT', 'CONTINUOUS', 'LATER', 'PULL BACK', 'CLICK',
    ]
    if stripped in skip_words or stripped.startswith('PULL'):
        return False
    if stripped.startswith('INT.') or stripped.startswith('EXT.'):
        return False
    return True


def _is_parenthetical(line: str) -> bool:
    """Detect parenthetical direction within dialogue."""
    stripped = line.strip()
    return stripped.startswith('(') and stripped.endswith(')')


def _count_words(text: str) -> int:
    """Count words in text."""
    return len(text.split())


def _parse_dialogue_blocks(text: str) -> list:
    """Parse text into dialogue blocks: list of (character, dialogue_text)."""
    lines = text.split('\n')
    blocks = []
    current_char = None
    current_lines = []
    in_dialogue = False

    for line in lines:
        stripped = line.strip()

        if _is_character_cue(stripped):
            # Save previous block
            if current_char and current_lines:
                blocks.append((current_char, '\n'.join(current_lines)))
            current_char = re.sub(r'\s*\([^)]+\)\s*', '', stripped).strip()
            current_lines = []
            in_dialogue = True

        elif in_dialogue:
            if _is_parenthetical(stripped):
                continue
            if stripped:
                current_lines.append(stripped)
            else:
                if current_char and current_lines:
                    blocks.append((current_char, '\n'.join(current_lines)))
                    current_char = None
                    current_lines = []
                in_dialogue = False

    # Trailing block
    if current_char and current_lines:
        blocks.append((current_char, '\n'.join(current_lines)))

    return blocks


def _count_dialogue_words(blocks: list) -> int:
    """Count total dialogue words from parsed blocks."""
    return sum(_count_words(text) for _, text in blocks)


def _count_exchanges(blocks: list) -> int:
    """Count total dialogue exchanges (each character cue = 1)."""
    return len(blocks)


# =============================================================================
# VALIDATION FUNCTIONS
# =============================================================================

def _validate_kill_box_sections(text: str) -> dict:
    """
    Validate Kill Box section structure: presence, order, and non-empty content.
    Returns dict with sections_found, sections_missing, order_valid, issues.
    """
    sections_found = []
    sections_missing = []
    positions = []
    issues = []

    for pattern, name, timestamp in KILL_BOX_SECTIONS:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            sections_found.append(name)
            positions.append(match.start())
        else:
            sections_missing.append(name)

    # Check order
    order_valid = all(positions[i] < positions[i + 1] for i in range(len(positions) - 1))
    if not order_valid and len(positions) > 1:
        issues.append("Kill Box sections are out of order")

    # Check non-empty content between sections
    for pattern, name, timestamp in KILL_BOX_SECTIONS:
        match = re.search(pattern, text, re.IGNORECASE)
        if not match:
            continue
        header_end = match.end()
        next_section = None
        for next_pattern, next_name, _ in KILL_BOX_SECTIONS:
            next_match = re.search(next_pattern, text[header_end:], re.IGNORECASE)
            if next_match:
                if next_section is None or next_match.start() < next_section:
                    next_section = next_match.start()
        separator = re.search(r'^---\s*$', text[header_end:], re.MULTILINE)
        if separator and (next_section is None or separator.start() < next_section):
            next_section = separator.start()
        if next_section is None:
            content_between = text[header_end:]
        else:
            content_between = text[header_end:header_end + next_section]
        stripped = content_between.strip()
        if not stripped:
            issues.append(f"{name} section ({timestamp}) has no content")

    return {
        'sections_found': sections_found,
        'sections_missing': sections_missing,
        'order_valid': order_valid,
        'issues': issues,
    }


def _check_formatting(content: str) -> list:
    """Check for correct V12 formatting structure. Returns list of issues."""
    issues = []

    if not re.search(r'\[\[EPISODE\s+\d+', content, re.IGNORECASE):
        issues.append("Missing episode header [[EPISODE X: TITLE]]")

    for pattern, name in [(p, n) for p, n, _ in KILL_BOX_SECTIONS]:
        if not re.search(pattern, content, re.IGNORECASE):
            issues.append(f"Missing Kill Box section: {name}")

    return issues


def _check_hook_silent(content: str) -> tuple:
    """Check if THE HOOK section has dialogue. Returns (is_silent, detail)."""
    hook_match = re.search(
        r'#\s*\[[\d:]+\s*-\s*[\d:]+\]\s*THE HOOK.*?\n(.*?)(?=#\s*\[[\d:]+|$)',
        content, re.DOTALL | re.IGNORECASE
    )
    if not hook_match:
        return True, "No HOOK section found"

    hook_content = hook_match.group(1)
    for line in hook_content.split('\n'):
        stripped = line.strip()
        if _is_character_cue(stripped):
            return False, f"Dialogue hook: {stripped}"

    return True, "Hook is silent"


def _check_meta_references(content: str) -> list:
    """Check for episode number references in prose. Returns list of issues."""
    issues = []
    sections = ['THE HOOK', 'THE SETUP', 'THE ESCALATION', 'THE TURN', 'THE CLIFFHANGER']

    for section_name in sections:
        pattern = rf'#\s*\[[\d:]+\s*-\s*[\d:]+\]\s*{section_name}.*?\n(.*?)(?=#\s*\[[\d:]+|---|\Z)'
        match = re.search(pattern, content, re.DOTALL | re.IGNORECASE)
        if match:
            prose = match.group(1)
            meta_matches = re.findall(r'\bEpisode\s+\d+\b', prose, re.IGNORECASE)
            if meta_matches:
                issues.append(f"Meta-reference in {section_name}: {meta_matches}")

    return issues


# =============================================================================
# PUBLIC API
# =============================================================================

def validate_episode(episode_path: str, constants: dict = None) -> dict:
    """
    Validate a single Kill Box episode.

    Args:
        episode_path: Path to the episode file.
        constants: Optional dict of constants. If None, loads from CONSTANTS.md.

    Returns:
        {valid: bool, errors: [], warnings: [], metrics: {}}
    """
    filepath = Path(episode_path)
    errors = []
    warnings = []
    metrics = {}

    if constants is None:
        constants = _load_default_constants()

    word_min = constants.get('WORD_COUNT_MIN', 450)
    word_max = constants.get('WORD_COUNT_MAX', 500)
    dialogue_max_pct = constants.get('DIALOGUE_MAX_PERCENT', 40)
    max_exchanges = constants.get('MAX_EXCHANGES', 8)
    max_action_lines = constants.get('MAX_ACTION_BLOCK_LINES', 4)

    if not filepath.exists():
        return {
            'valid': False,
            'errors': [f"File not found: {filepath}"],
            'warnings': [],
            'metrics': {},
        }

    content = filepath.read_text(encoding='utf-8')

    # Word count
    total_words = _count_words(content)
    metrics['word_count'] = total_words

    if total_words < word_min:
        errors.append(f"Word count too LOW: {total_words} (min: {word_min})")
    elif total_words > word_max:
        errors.append(f"Word count too HIGH: {total_words} (max: {word_max})")

    # Dialogue
    dialogue_blocks = _parse_dialogue_blocks(content)
    dialogue_words = _count_dialogue_words(dialogue_blocks)
    exchanges = _count_exchanges(dialogue_blocks)
    dialogue_pct = (dialogue_words / total_words * 100) if total_words > 0 else 0

    metrics['dialogue_word_count'] = dialogue_words
    metrics['dialogue_percent'] = round(dialogue_pct, 1)
    metrics['exchange_count'] = exchanges
    metrics['characters'] = list(set(c for c, _ in dialogue_blocks))

    if dialogue_pct > dialogue_max_pct:
        errors.append(f"Dialogue too HIGH: {dialogue_pct:.1f}% (max: {dialogue_max_pct}%)")
    if exchanges > max_exchanges:
        errors.append(f"Exchanges too HIGH: {exchanges} (max: {max_exchanges})")

    # Kill Box section structure
    kb = _validate_kill_box_sections(content)
    metrics['kill_box_sections_found'] = kb['sections_found']
    metrics['kill_box_sections_missing'] = kb['sections_missing']
    metrics['kill_box_order_valid'] = kb['order_valid']
    metrics['kill_box_issues'] = kb['issues']

    if kb['sections_missing']:
        errors.append(f"Missing Kill Box sections: {', '.join(kb['sections_missing'])}")
    if not kb['order_valid'] and len(kb['sections_found']) > 1:
        errors.append("Kill Box sections are out of order")
    for issue in kb['issues']:
        if 'no content' in issue:
            warnings.append(issue)

    # Formatting checks
    format_issues = _check_formatting(content)
    for issue in format_issues:
        if 'Missing episode header' in issue:
            warnings.append(issue)
        # Section-missing issues are already captured above

    # Hook check
    hook_silent, hook_detail = _check_hook_silent(content)
    metrics['hook_silent'] = hook_silent
    if not hook_silent:
        warnings.append(f"Hook has dialogue: {hook_detail}")

    # Meta-reference check
    meta_issues = _check_meta_references(content)
    for issue in meta_issues:
        warnings.append(issue)

    # Action block analysis (best-effort)
    # Parse action blocks for longest-block check
    action_lines_counts = []
    in_action = False
    current_block_lines = 0
    in_dialogue = False
    for line in content.split('\n'):
        stripped = line.strip()
        if _is_character_cue(stripped):
            if in_action and current_block_lines > 0:
                action_lines_counts.append(current_block_lines)
                current_block_lines = 0
            in_action = False
            in_dialogue = True
        elif in_dialogue:
            if not stripped:
                in_dialogue = False
        elif stripped and not stripped.startswith('#') and not stripped.startswith('---'):
            if not in_action:
                in_action = True
                current_block_lines = 0
            current_block_lines += 1
        elif not stripped and in_action:
            action_lines_counts.append(current_block_lines)
            current_block_lines = 0
            in_action = False

    if in_action and current_block_lines > 0:
        action_lines_counts.append(current_block_lines)

    longest_action = max(action_lines_counts) if action_lines_counts else 0
    metrics['longest_action_block'] = longest_action
    if longest_action > max_action_lines:
        warnings.append(
            f"Action block too long: {longest_action} lines (max: {max_action_lines})"
        )

    return {
        'valid': len(errors) == 0,
        'errors': errors,
        'warnings': warnings,
        'metrics': metrics,
    }


def validate_batch(episode_paths: list, constants: dict = None) -> dict:
    """
    Validate a batch of Kill Box episodes.

    Args:
        episode_paths: List of file paths to episode files.
        constants: Optional dict of constants. If None, loads from CONSTANTS.md.

    Returns:
        {valid: bool, episode_results: [...]}
    """
    if constants is None:
        constants = _load_default_constants()

    results = []
    all_valid = True

    for ep_path in episode_paths:
        result = validate_episode(str(ep_path), constants=constants)
        result['file'] = str(ep_path)
        results.append(result)
        if not result['valid']:
            all_valid = False

    return {
        'valid': all_valid,
        'episode_results': results,
    }