#!/usr/bin/python3
"""
Pre-Generation Gate - Required Files Validation

This script validates that ALL required files exist before episode generation
can begin. This is a HARD GATE - generation CANNOT proceed if any required
file is missing.

Required files:
1. treatment.md           - MASTER generation input (prose, THE MOMENT, cliffhangers)
2. ORCHESTRATION.md       - Project-specific rules, batch schedule
3. bible/characters.md    - Voice patterns, behavioral DNA
4. bible/series_bible.md  - World, characters, thematic core
5. state/current_state.json - Position tracking

Usage: python3 validate_pre_generation.py <project_path>
Example: python3 validate_pre_generation.py ./leviathan

Returns:
- Exit code 0: All required files exist, generation may proceed
- Exit code 1: One or more required files missing, generation blocked
- Exit code 2: Configuration/path error
"""

import hashlib
import json
import re
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent))  # CLAUDE_PROJECTS, for recoil.*
from recoil.core.paths import ProjectPaths

# Required files for generation (relative to project root)
REQUIRED_FILES = [
    {
        'path': 'treatment.md',
        'name': 'Treatment',
        'description': 'MASTER generation input - contains prose paragraphs, THE MOMENT, and cliffhanger images for each episode',
        'how_to_create': 'Run: /treatment [project]',
        'critical': True,
    },
    {
        'path': 'ORCHESTRATION.md',
        'name': 'Orchestration',
        'description': 'Project-specific rules, batch schedule, behavioral DNA summary',
        'how_to_create': 'Created during promotion from development',
        'critical': True,
    },
    {
        'path': 'bible/characters.md',
        'name': 'Characters',
        'description': 'Voice patterns, behavioral DNA, humor types, anti-patterns',
        'how_to_create': 'Run: /develop [project] or create manually',
        'critical': True,
    },
    {
        'path': 'bible/series_bible.md',
        'name': 'Series Bible',
        'description': 'World, characters, thematic core, visual palette',
        'how_to_create': 'Created during development phase',
        'critical': True,
    },
    {
        'path': 'state/current_state.json',
        'name': 'State File',
        'description': 'Position tracking, character states, thread status',
        'how_to_create': 'Created during promotion (initialized state)',
        'critical': True,
    },
]

# Optional but recommended files
RECOMMENDED_FILES = [
    {
        'path': 'bible/episode_arc.md',
        'name': 'Episode Arc',
        'description': 'Episode-by-episode outline (reference, not used directly)',
    },
    {
        'path': 'bible/relationship_map.md',
        'name': 'Relationship Map',
        'description': 'Relationship milestones and earning schedule',
    },
    {
        'path': 'bible/plant_payoff_plan.md',
        'name': 'Plant/Payoff Plan',
        'description': 'Thread tracking across episodes',
    },
]


def check_file(project_path, file_info):
    """Check if a required file exists and has content."""
    file_path = project_path / file_info['path']

    result = {
        'path': file_info['path'],
        'name': file_info['name'],
        'exists': False,
        'has_content': False,
        'size': 0,
    }

    if file_path.exists():
        result['exists'] = True
        result['size'] = file_path.stat().st_size
        result['has_content'] = result['size'] > 100  # More than 100 bytes

    return result


def validate_treatment_structure(project_path):
    """Additional validation for treatment.md structure."""
    treatment_path = project_path / 'treatment.md'

    if not treatment_path.exists():
        return None

    content = treatment_path.read_text()

    issues = []

    # Check for episode markers
    episode_count = len(list(set(
        int(m.group(1)) for m in
        re.finditer(r'##\s*Episode\s*(\d+)', content, re.IGNORECASE)
    )))

    if episode_count < 60:
        issues.append(f"Only {episode_count} episodes found (need 60)")

    # Check for THE MOMENT markers
    moment_count = content.lower().count('the moment')
    if moment_count < 30:  # Should have at least 30 for 60 episodes
        issues.append(f"Only {moment_count} THE MOMENT markers found (expected ~60)")

    return {
        'episode_count': episode_count,
        'moment_count': moment_count,
        'issues': issues,
        'valid': len(issues) == 0,
    }


# ---------------------------------------------------------------------------
# Structural Gates S1-S5 (Development Phase — run once)
# LLM-based evaluation of treatment/blueprint quality.
# Results cached to state/structural_gate_results.json.
# ---------------------------------------------------------------------------

try:
    sys.path.insert(0, str(Path(__file__).resolve().parent.parent / 'tools'))
    from engine_constants import (
        ANTHROPIC_SONNET, ANTHROPIC_OPUS,
        get_anthropic_client, call_anthropic, parse_llm_field,
    )
except ImportError:
    ANTHROPIC_SONNET = "claude-sonnet-4-6"
    ANTHROPIC_OPUS = "claude-opus-4-6"
    get_anthropic_client = lambda: None
    call_anthropic = lambda client, model, prompt, max_tokens=200: None
    parse_llm_field = lambda result, field, expected=None: None


def _file_hash(filepath):
    """Get MD5 hash of file content for cache invalidation."""
    if not filepath.exists():
        return None
    return hashlib.md5(filepath.read_bytes()).hexdigest()


def _load_cache(project_path):
    """Load cached structural gate results."""
    cache_path = ProjectPaths.from_root(project_path).state_dir / "structural_gate_results.json"
    if cache_path.exists():
        try:
            return json.loads(cache_path.read_text())
        except (json.JSONDecodeError, OSError):
            pass
    return None


def _save_cache(project_path, results, treatment_hash):
    """Save structural gate results to cache."""
    cache_path = ProjectPaths.from_root(project_path).state_dir / "structural_gate_results.json"
    cache_path.parent.mkdir(parents=True, exist_ok=True)
    data = {
        "treatment_hash": treatment_hash,
        "results": results,
    }
    cache_path.write_text(json.dumps(data, indent=2))


def _extract_climax_section(treatment_text):
    """Extract the climax/finale section from treatment for focused evaluation."""
    lines = treatment_text.split('\n')
    # Look for episodes 55-60 (the climax)
    climax_start = None
    for i, line in enumerate(lines):
        if any(f'episode {n}' in line.lower() or f'## episode {n}' in line.lower()
               for n in range(55, 61)):
            if climax_start is None:
                climax_start = i
            break
    if climax_start:
        return '\n'.join(lines[climax_start:])
    # Fallback: last 20% of treatment
    start = int(len(lines) * 0.8)
    return '\n'.join(lines[start:])


def _extract_all_is_lost_section(treatment_text):
    """Extract the All Is Lost section (around episodes 43-46)."""
    lines = treatment_text.split('\n')
    section_lines = []
    capturing = False
    for line in lines:
        if any(f'episode {n}' in line.lower() or f'## episode {n}' in line.lower()
               for n in range(42, 48)):
            capturing = True
        elif capturing and line.startswith('## Episode'):
            ep_num_match = re.search(r'Episode\s*(\d+)', line)
            if ep_num_match and int(ep_num_match.group(1)) > 48:
                break
        if capturing:
            section_lines.append(line)
    return '\n'.join(section_lines) if section_lines else treatment_text[-3000:]


def check_s1_crisis_dilemma(client, treatment_text, series_bible_text):
    """
    S1: Crisis Dilemma Adversarial Test (McKee)
    Uses Opus with Advocate A/B/Judge pattern.
    Checks: Is the climax a genuine choice between irreconcilable goods?
    """
    climax = _extract_climax_section(treatment_text)
    context = f"SERIES BIBLE:\n{series_bible_text[:3000]}\n\nCLIMAX SECTION:\n{climax[:4000]}"

    # Advocate A: Argue it IS a genuine dilemma
    advocate_a_prompt = f"""You are Advocate A in a structural review of a screenplay's climax.

Your job: Argue that the crisis IS a genuine dilemma — a choice between two irreconcilable goods or the lesser of two evils. Both options must cost the protagonist something real.

{context}

Make your strongest case in 3-4 sentences. What are the two competing goods? Why can't the protagonist have both?"""

    # Advocate B: Argue it's a FALSE choice
    advocate_b_prompt = f"""You are Advocate B in a structural review of a screenplay's climax.

Your job: Argue that the crisis is a FALSE choice — one option is clearly better, the protagonist wouldn't genuinely consider the alternative, or the dilemma is manufactured rather than organic.

{context}

Make your strongest case in 3-4 sentences. Why is this not a real dilemma?"""

    try:
        resp_a = client.messages.create(
            model=ANTHROPIC_OPUS, max_tokens=300,
            messages=[{"role": "user", "content": advocate_a_prompt}],
        )
        arg_a = resp_a.content[0].text.strip()

        resp_b = client.messages.create(
            model=ANTHROPIC_OPUS, max_tokens=300,
            messages=[{"role": "user", "content": advocate_b_prompt}],
        )
        arg_b = resp_b.content[0].text.strip()

        # Judge decides
        judge_prompt = f"""You are the Judge in a structural review of a screenplay's climax crisis.

Advocate A argues the crisis IS a genuine dilemma:
{arg_a}

Advocate B argues the crisis is a FALSE choice:
{arg_b}

Decide: Is this crisis a genuine dilemma or a false choice?

Output EXACTLY:
VERDICT: [GENUINE or FALSE]
REASON: [2-3 sentences explaining your ruling]"""

        resp_judge = client.messages.create(
            model=ANTHROPIC_OPUS, max_tokens=300,
            messages=[{"role": "user", "content": judge_prompt}],
        )
        result = resp_judge.content[0].text.strip()

        verdict = parse_llm_field(result, "VERDICT", ["GENUINE", "FALSE"]) if parse_llm_field else None
        reason = (parse_llm_field(result, "REASON") or result)[:200] if parse_llm_field else result[:200]

        return {
            "passed": verdict == "GENUINE" if verdict else True,
            "verdict": verdict or "INCONCLUSIVE",
            "reason": reason,
            "advocate_a": arg_a[:150],
            "advocate_b": arg_b[:150],
        }
    except Exception as e:
        return {"passed": True, "verdict": "SKIPPED", "reason": str(e)[:100]}


def check_s2_integration(client, treatment_text, series_bible_text):
    """
    S2: Integration Test (Aronson)
    Binary check: Can A-story climax work without B-story lesson?
    """
    climax = _extract_climax_section(treatment_text)

    prompt = f"""You are evaluating structural integration in a screenplay treatment.

RULE (Aronson): The physical plot (A-Story) can only be resolved by applying the emotional truth (B-Story). If the A-story climax could work without the B-story lesson, the parallel structure has failed.

SERIES CONTEXT:
{series_bible_text[:2000]}

CLIMAX SECTION:
{climax[:4000]}

Answer:
1. What is the A-story (physical/external plot)?
2. What is the B-story (emotional/internal journey)?
3. Can the A-story climax succeed WITHOUT the B-story lesson? (YES or NO)

Output EXACTLY:
A_STORY: [one sentence]
B_STORY: [one sentence]
CAN_WORK_WITHOUT: [YES or NO]
REASON: [one sentence]"""

    try:
        resp = client.messages.create(
            model=ANTHROPIC_SONNET, max_tokens=300,
            messages=[{"role": "user", "content": prompt}],
        )
        result = resp.content[0].text.strip()

        # If A can work without B, integration FAILS
        can_work_raw = parse_llm_field(result, "CAN_WORK_WITHOUT", ["YES", "NO"]) if parse_llm_field else None
        can_work_without = (can_work_raw == "YES") if can_work_raw is not None else None
        reason = (parse_llm_field(result, "REASON") or result)[:200] if parse_llm_field else result[:200]

        return {
            "passed": not can_work_without if can_work_without is not None else True,
            "can_work_without": can_work_without,
            "reason": reason,
        }
    except Exception as e:
        return {"passed": True, "reason": f"SKIPPED: {e}"}


def check_s4_shadow_mirror(client, treatment_text, characters_text):
    """
    S4: Shadow Mirror Quality (Vogler)
    Rubric 1-10, min 7: Does antagonist attack protagonist's specific paradox?
    """
    prompt = f"""You are evaluating antagonist quality in a screenplay treatment.

RULE (Vogler): A strong antagonist specifically targets the protagonist's psychological weakness — their internal paradox. A generic "evil" or "misunderstood" antagonist is a structural failure. The shadow must mirror and attack the hero's specific flaw.

CHARACTER DETAILS:
{characters_text[:3000]}

TREATMENT (key sections):
{treatment_text[:5000]}

Score the antagonist on a 1-10 scale:
- 1-3: Generic antagonist (pure evil or vague motivation)
- 4-6: Has motivation but doesn't target protagonist's specific weakness
- 7-8: Specifically attacks the protagonist's internal contradiction
- 9-10: The antagonist IS what the protagonist would become without transformation

Output EXACTLY:
SCORE: [1-10]
ANTAGONIST: [name or description]
ATTACKS: [what specific weakness they target]
REASON: [one sentence]"""

    try:
        resp = client.messages.create(
            model=ANTHROPIC_SONNET, max_tokens=300,
            messages=[{"role": "user", "content": prompt}],
        )
        result = resp.content[0].text.strip()

        score = None
        score_text = parse_llm_field(result, "SCORE") if parse_llm_field else None
        if score_text is None and "SCORE:" in result:
            score_text = result.split("SCORE:")[1].split("\n")[0].strip()
        if score_text:
            try:
                score = int(score_text.split("/")[0].strip())
            except (ValueError, IndexError):
                pass

        reason = (parse_llm_field(result, "REASON") or result)[:200] if parse_llm_field else result[:200]

        return {
            "passed": score >= 7 if score is not None else True,
            "score": score,
            "reason": reason[:200],
        }
    except Exception as e:
        return {"passed": True, "score": None, "reason": f"SKIPPED: {e}"}


def check_s5_dig_deep_down(client, treatment_text):
    """
    S5: Dig Deep Down Presence (Snyder)
    Binary: What internal resource does protagonist access at All Is Lost?
    When was it planted?
    """
    ail_section = _extract_all_is_lost_section(treatment_text)

    prompt = f"""You are evaluating transformation mechanism in a screenplay treatment.

RULE (Snyder "Dig Deep Down"): At the All Is Lost moment, the protagonist must access an internal resource — something they couldn't use before because of their flaw. This resource must be PLANTED earlier in the story. The transformation is not spontaneous; it's earned.

ALL IS LOST SECTION (Episodes ~43-47):
{ail_section[:4000]}

EARLIER TREATMENT (for checking plants):
{treatment_text[:4000]}

Answer:
1. What is the protagonist's All Is Lost moment?
2. What internal resource do they access to rally?
3. Where was this resource planted earlier? (specific episode or section)

Output EXACTLY:
ALL_IS_LOST: [one sentence describing the moment]
RESOURCE: [what internal resource they access]
PLANTED_AT: [where it was planted, or MISSING if not found]
SHIFTS: [YES if transformation mechanism is present, NO if missing]"""

    try:
        resp = client.messages.create(
            model=ANTHROPIC_SONNET, max_tokens=300,
            messages=[{"role": "user", "content": prompt}],
        )
        result = resp.content[0].text.strip()

        shifts_raw = parse_llm_field(result, "SHIFTS", ["YES", "NO"]) if parse_llm_field else None
        has_shift = (shifts_raw == "YES") if shifts_raw is not None else None
        planted_raw = parse_llm_field(result, "PLANTED_AT") if parse_llm_field else None
        planted_missing = ("MISSING" in planted_raw.upper()) if planted_raw else ("MISSING" in result.split("PLANTED_AT:")[-1].split("\n")[0].upper() if "PLANTED_AT:" in result else False)

        return {
            "passed": (has_shift and not planted_missing) if has_shift is not None else True,
            "has_mechanism": has_shift,
            "planted_missing": planted_missing,
            "reason": result[:300],
        }
    except Exception as e:
        return {"passed": True, "reason": f"SKIPPED: {e}"}


def run_structural_gates(project_path):
    """
    Run all structural gates (S1-S5) on the treatment/blueprint.

    Results are cached. Only re-runs if treatment.md has changed.
    Returns (all_passed, results_dict) or (True, None) if skipped.
    """
    client = get_anthropic_client()
    if client is None:
        return True, {"skipped": True, "reason": "No ANTHROPIC_API_KEY — structural gates skipped"}

    treatment_path = project_path / "treatment.md"
    if not treatment_path.exists():
        return True, {"skipped": True, "reason": "No treatment.md — structural gates skipped"}

    # Check cache
    current_hash = _file_hash(treatment_path)
    cache = _load_cache(project_path)
    if cache and cache.get("treatment_hash") == current_hash:
        cached_results = cache.get("results", {})
        all_passed = all(g.get("passed", True) for g in cached_results.values())
        return all_passed, {**cached_results, "cached": True}

    # Load content
    treatment_text = treatment_path.read_text()

    characters_path = project_path / "bible" / "characters.md"
    characters_text = characters_path.read_text() if characters_path.exists() else ""

    bible_path = project_path / "bible" / "series_bible.md"
    series_bible_text = bible_path.read_text() if bible_path.exists() else ""

    print(f"\n  Running structural gates (first run or treatment changed)...")

    # Run gates — S1 sequential (Advocate A→B→Judge), S2/S4/S5 in parallel
    from concurrent.futures import ThreadPoolExecutor, as_completed

    results = {}

    print(f"    S1: Crisis Dilemma Adversarial Test (Opus)...")
    results["s1_crisis_dilemma"] = check_s1_crisis_dilemma(client, treatment_text, series_bible_text)

    print(f"    S2/S4/S5: Running in parallel...")

    gate_fns = {
        "s2_integration": lambda: check_s2_integration(client, treatment_text, series_bible_text),
        "s4_shadow_mirror": lambda: check_s4_shadow_mirror(client, treatment_text, characters_text),
        "s5_dig_deep_down": lambda: check_s5_dig_deep_down(client, treatment_text),
    }

    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = {executor.submit(fn): key for key, fn in gate_fns.items()}
        for future in as_completed(futures):
            key = futures[future]
            results[key] = future.result()

    # Cache results
    _save_cache(project_path, results, current_hash)

    all_passed = all(g.get("passed", True) for g in results.values())
    return all_passed, results


def main():
    if len(sys.argv) < 2:
        print("Usage: python3 validate_pre_generation.py <project_path>")
        print("Example: python3 validate_pre_generation.py ./leviathan")
        sys.exit(2)

    project_path = Path(sys.argv[1]).resolve()

    if not project_path.exists():
        print(f"Error: Project path does not exist: {project_path}")
        sys.exit(2)

    # Check all required files
    missing_critical = []
    present_files = []

    for file_info in REQUIRED_FILES:
        result = check_file(project_path, file_info)

        if not result['exists']:
            missing_critical.append({
                **file_info,
                **result,
            })
        elif not result['has_content']:
            missing_critical.append({
                **file_info,
                **result,
                'issue': 'File exists but appears empty',
            })
        else:
            present_files.append(result)

    # Check recommended files
    missing_recommended = []
    for file_info in RECOMMENDED_FILES:
        result = check_file(project_path, file_info)
        if not result['exists']:
            missing_recommended.append(file_info)

    # Additional treatment validation if it exists
    treatment_validation = None
    if (project_path / 'treatment.md').exists():
        treatment_validation = validate_treatment_structure(project_path)

    # Report
    print(f"\n{'='*60}")
    print(f"PRE-GENERATION GATE: Required Files Validation")
    print(f"Project: {project_path.name}")
    print(f"{'='*60}")

    # Show present files
    if present_files:
        print(f"\n✓ PRESENT ({len(present_files)}/{len(REQUIRED_FILES)}):")
        for f in present_files:
            size_kb = f['size'] / 1024
            print(f"  ✓ {f['name']}: {f['path']} ({size_kb:.1f} KB)")

    # Show missing files
    if missing_critical:
        print(f"\n✗ MISSING ({len(missing_critical)}):")
        for f in missing_critical:
            print(f"\n  ✗ {f['name']}: {f['path']}")
            print(f"    Purpose: {f['description']}")
            print(f"    Create with: {f['how_to_create']}")
            if 'issue' in f:
                print(f"    Issue: {f['issue']}")

    # Show treatment validation if applicable
    if treatment_validation:
        print(f"\n{'─'*60}")
        print(f"TREATMENT STRUCTURE CHECK:")
        print(f"  Episodes found: {treatment_validation['episode_count']}/60")
        print(f"  THE MOMENT markers: {treatment_validation['moment_count']}")
        if treatment_validation['issues']:
            print(f"\n  ⚠ Issues:")
            for issue in treatment_validation['issues']:
                print(f"    - {issue}")

    # Show recommended files
    if missing_recommended:
        print(f"\n{'─'*60}")
        print(f"RECOMMENDED (not blocking):")
        for f in missing_recommended:
            print(f"  ○ {f['name']}: {f['path']}")

    # --- Structural Gates S1-S5 (run only if files present) ---
    structural_passed = True
    structural_results = None

    if not missing_critical:
        structural_passed, structural_results = run_structural_gates(project_path)

        if structural_results and not structural_results.get("skipped"):
            print(f"\n{'─'*60}")
            print(f"STRUCTURAL GATES (S1-S5):")
            is_cached = structural_results.get("cached", False)
            if is_cached:
                print(f"  (cached — treatment.md unchanged)")

            for gate_key, gate_result in structural_results.items():
                if gate_key in ("cached", "skipped", "reason"):
                    continue
                status = "PASS" if gate_result.get("passed") else "FAIL"
                label = gate_key.upper().replace("_", " ")

                if gate_key == "s1_crisis_dilemma":
                    verdict = gate_result.get("verdict", "?")
                    print(f"  [{status}] S1 Crisis Dilemma: {verdict}")
                    if not gate_result.get("passed"):
                        print(f"         {gate_result.get('reason', '')[:80]}")
                elif gate_key == "s2_integration":
                    print(f"  [{status}] S2 Integration Test")
                    if not gate_result.get("passed"):
                        print(f"         A-story can work without B-story: {gate_result.get('reason', '')[:80]}")
                elif gate_key == "s4_shadow_mirror":
                    score = gate_result.get("score", "?")
                    print(f"  [{status}] S4 Shadow Mirror Quality: {score}/10 (min 7)")
                    if not gate_result.get("passed"):
                        print(f"         {gate_result.get('reason', '')[:80]}")
                elif gate_key == "s5_dig_deep_down":
                    print(f"  [{status}] S5 Dig Deep Down Presence")
                    if not gate_result.get("passed"):
                        planted = "planted" if not gate_result.get("planted_missing") else "NOT PLANTED"
                        print(f"         Transformation mechanism: {planted}")

        elif structural_results and structural_results.get("skipped"):
            print(f"\n{'─'*60}")
            print(f"STRUCTURAL GATES: {structural_results.get('reason', 'skipped')}")

    # Final result
    print(f"\n{'='*60}")

    if missing_critical:
        print(f"PRE-GENERATION GATE: ✗ BLOCKED")
        print(f"\n{len(missing_critical)} required file(s) missing.")
        print(f"\nCANNOT PROCEED TO GENERATION")
        print(f"\nCreate the missing files before running /generate:")

        # Specific guidance for treatment.md
        if any(f['path'] == 'treatment.md' for f in missing_critical):
            print(f"\n  → treatment.md is the MASTER generation input.")
            print(f"    Run: /treatment {project_path.name}")
            print(f"    This creates the prose paragraphs, THE MOMENT,")
            print(f"    and cliffhanger images for all 60 episodes.")

        print(f"{'='*60}\n")
        sys.exit(1)
    elif not structural_passed:
        print(f"PRE-GENERATION GATE: ⚠ STRUCTURAL ISSUES")
        print(f"\nAll required files present, but structural gates flagged issues.")
        print(f"Review and address the FAIL items above before generating.")
        print(f"To re-run structural gates: delete state/structural_gate_results.json")
        print(f"\nGeneration may proceed with caution (structural issues are warnings).")
        print(f"{'='*60}\n")
        sys.exit(0)  # Warn but allow
    else:
        # Check treatment structure if present
        if treatment_validation and not treatment_validation['valid']:
            print(f"PRE-GENERATION GATE: ⚠ WARNING")
            print(f"\nAll required files present, but treatment.md may be incomplete.")
            print(f"Review the issues above before proceeding.")
            print(f"\nGeneration may proceed with caution.")
            print(f"{'='*60}\n")
            sys.exit(0)  # Allow but warn
        else:
            print(f"PRE-GENERATION GATE: ✓ PASSED")
            print(f"\nAll required files present and validated.")
            print(f"Generation may proceed.")
            print(f"{'='*60}\n")
            sys.exit(0)


if __name__ == "__main__":
    main()