#!/usr/bin/env python3
"""
Trim Pass — Gemini rewrites episodes to cut unfilmable content and replace
with visual action.

Cuts:
  - Repeated behavioral descriptions (batch echo)
  - Internal psychology that can't be filmed
  - Exposition where characters narrate the visible

Replaces with:
  - Filmable reaction shots and physical behavior
  - Visual action beats that SHOW emotion
  - Camera-ready descriptions

Usage:
    python3 trim_pass.py [project]                    # Full 60-episode trim
    python3 trim_pass.py [project] --episodes 1-10    # Specific range
    python3 trim_pass.py [project] --dry-run           # Preview one episode, don't overwrite
    python3 trim_pass.py [project] --model MODEL       # Override Gemini model

Requires:
    pip install google-generativeai
    export GEMINI_API_KEY="your-key-here"
"""

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import time
from datetime import datetime
from pathlib import Path

from lib.model_registry import get_model

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

DEFAULT_MODEL = get_model("flash", "text")
# Note: Gemini safety filters block violent screenplay content (throat grabs, etc.)
# even with BLOCK_NONE. For Leviathan, use Claude sub-agents instead:
#   python3 trim_pass.py leviathan --engine claude
TOKENS_PER_WORD = 1.3
RATE_LIMIT_DELAY = 4.5  # seconds between requests (safe for 15 RPM)

# Engine root (relative to this script)
SCRIPT_DIR = Path(__file__).resolve().parent
ENGINE_ROOT = SCRIPT_DIR.parent.parent


def p(msg):
    print(msg, flush=True)


def get_genai():
    """Import and configure Gemini SDK."""
    import google.generativeai as genai
    api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
    if not api_key:
        p("ERROR: No API key. Set GEMINI_API_KEY or GOOGLE_API_KEY.")
        sys.exit(1)
    genai.configure(api_key=api_key)
    return genai


def get_safety_settings():
    """Build safety settings using proper enum types."""
    try:
        from google.generativeai.types import HarmCategory, HarmBlockThreshold
        return {
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
        }
    except ImportError:
        return [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
        ]


def load_character_bible(project_dir):
    bible_path = project_dir / "bible" / "characters.md"
    if not bible_path.exists():
        p(f"WARNING: No character bible at {bible_path}")
        return ""
    return bible_path.read_text()


def load_script_doctor_findings(project_dir):
    brief_path = project_dir / "state" / "script_doctor_brief.json"
    if not brief_path.exists():
        return ""
    brief = json.loads(brief_path.read_text())
    lines = ["SCRIPT DOCTOR FINDINGS (patterns to cut):\n"]
    for f in brief.get("findings", []):
        lines.append(f"- [{f['severity']}] {f['title']}: {f['description']}")
    return "\n".join(lines)


def load_episode(ep_path):
    return ep_path.read_text()


def build_trim_prompt(episode_text, character_bible, findings_summary):
    return f"""You are doing a TRIM PASS on a microdrama episode. This is a vertical short-form drama (9:16) that will be produced as AI-generated video. Every line of action description MUST be filmable.

## YOUR JOB

1. **CUT** repeated behavioral descriptions that appear across multiple episodes:
   - "broken bark laugh" / "involuntary bark" → replace with a SPECIFIC laugh for THIS moment
   - "adjusts his cuffs" if it appears as rote habit → keep ONLY if it reveals something new
   - Gambling metaphors restated generically ("bad odds," "sixty-forty") → replace with situation-specific dialogue

2. **CUT** internal psychology that cannot be filmed:
   - "Something shifts inside him" → REPLACE with a visible reaction: a flinch, a breath, a hand tightening
   - "She feels the weight of..." → REPLACE with physical behavior: shoulders drop, jaw sets, eyes go flat
   - "He realizes..." → REPLACE with what the CAMERA SEES: his hand stops mid-reach, his eyes track to the door
   - Any sentence that describes a feeling, thought, or internal state

3. **CUT** exposition where characters narrate the visible:
   - If a character says what the audience can already see, cut the line
   - If dialogue explains backstory that could be shown through behavior, replace with action

4. **REPLACE** every cut with filmable action:
   - Reaction shots: "Jinx's eyes flicker back and forth as she considers the option"
   - Physical behavior: "Kian's remaining hand closes into a fist, servos whining"
   - Environmental detail: "Condensation drips from a cracked pipe. The corridor breathes"
   - Micro-expressions: "Varek's jaw tightens — barely visible, immediately controlled"

## RULES

- **WORD COUNT IS MANDATORY: The output MUST be between 450-500 words.** Count the words in your output before returning it. If you cut 40 words of psychology, you MUST add 40 words of filmable action. Do NOT return anything under 450 words. This is the single most important constraint.
- Keep ALL dialogue exchanges intact. You may tighten wording but NEVER remove an exchange.
- Keep the Kill Box structure exactly: HOOK, SETUP, ESCALATION, TURN, CLIFFHANGER with timestamps
- Keep the header (title, word count, dialogue %) and footer metadata exactly — update word count and dialogue % to match your edits
- Keep character voice — Jinx is sharp and sardonic, Kian is clinical and direct, Varek is controlled and menacing
- Do NOT add new dialogue. Do NOT change plot beats. Do NOT alter what happens in the scene.
- Do NOT add emotional commentary or authorial voice ("we see...", "the audience feels...")
- Every sentence you write must pass the CAMERA TEST: "Could a camera capture this?" If no, rewrite it.

## CHARACTER BIBLE (for voice reference)

{character_bible[:3000]}

## KNOWN PATTERNS TO TARGET

{findings_summary}

## EPISODE TO TRIM

{episode_text}

## OUTPUT

Return ONLY the complete trimmed episode text. Same format, same structure. No commentary, no explanations, no markdown code fences. Just the episode."""


def trim_episode(genai, model_name, episode_text, character_bible, findings_summary):
    """Send one episode to Gemini for trimming."""
    prompt = build_trim_prompt(episode_text, character_bible, findings_summary)
    model = genai.GenerativeModel(model_name)
    safety = get_safety_settings()

    # Retry up to 3 times on safety blocks
    for attempt in range(3):
        response = model.generate_content(
            prompt,
            generation_config={
                "temperature": 0.3 + (attempt * 0.1),  # Slightly increase temp on retry
                "max_output_tokens": 4096,
            },
            safety_settings=safety,
        )
        try:
            return response.text.strip()
        except ValueError:
            reason = "unknown"
            if response.candidates:
                reason = getattr(response.candidates[0], "finish_reason", "unknown")
            if attempt < 2:
                p(f"  Retry {attempt+1}/2 (finish_reason: {reason})...")
                time.sleep(2)
            else:
                raise RuntimeError(f"Gemini returned no content after 3 attempts (finish_reason: {reason})")


def validate_episode(ep_path):
    """Run episode_metrics.py and return (is_valid, details)."""
    metrics_script = SCRIPT_DIR / "episode_metrics.py"
    if not metrics_script.exists():
        return True, "validator not found, skipping"
    result = subprocess.run(
        [sys.executable, str(metrics_script), str(ep_path), "--json"],
        capture_output=True, text=True
    )
    if result.returncode != 0:
        return False, result.stderr.strip()
    try:
        data = json.loads(result.stdout)
        return data.get("is_valid", False), json.dumps(data, indent=2)
    except json.JSONDecodeError:
        return True, "could not parse validator output"


def main():
    parser = argparse.ArgumentParser(description="Trim pass — cut unfilmable content, replace with visual action")
    parser.add_argument("project", help="Project folder name")
    parser.add_argument("--episodes", help="Episode range (e.g., 1-10, 5, 20-30)", default=None)
    parser.add_argument("--dry-run", action="store_true", help="Trim one episode to preview, don't overwrite")
    parser.add_argument("--model", default=DEFAULT_MODEL, help=f"Gemini model (default: {DEFAULT_MODEL})")
    parser.add_argument("--no-backup", action="store_true", help="Skip backup (dangerous)")
    args = parser.parse_args()

    # Resolve project directory
    project_dir = ENGINE_ROOT / args.project
    if not project_dir.exists():
        p(f"ERROR: Project directory not found: {project_dir}")
        sys.exit(1)

    episodes_dir = project_dir / "episodes"
    if not episodes_dir.exists():
        p(f"ERROR: No episodes directory: {episodes_dir}")
        sys.exit(1)

    # Find episodes
    all_eps = sorted(episodes_dir.glob("ep_*.md"))
    if not all_eps:
        p("ERROR: No episodes found.")
        sys.exit(1)

    # Parse episode range
    if args.episodes:
        parts = args.episodes.split("-")
        if len(parts) == 1:
            start = end = int(parts[0])
        else:
            start, end = int(parts[0]), int(parts[1])
        eps_to_process = [ep for ep in all_eps if start <= int(re.search(r'(\d+)', ep.stem).group()) <= end]
    else:
        eps_to_process = all_eps

    if args.dry_run:
        eps_to_process = eps_to_process[:1]

    p("=" * 60)
    p(f"TRIM PASS — {args.project}")
    p(f"Model: {args.model}")
    p(f"Episodes: {len(eps_to_process)} of {len(all_eps)}")
    if args.dry_run:
        p("** DRY RUN — preview only, no files changed **")
    p("=" * 60)

    # Load context
    p("\nLoading context...")
    character_bible = load_character_bible(project_dir)
    findings_summary = load_script_doctor_findings(project_dir)
    p(f"  Character bible: {len(character_bible)} chars")
    p(f"  Script doctor findings: {'loaded' if findings_summary else 'none'}")

    # Backup originals
    if not args.dry_run and not args.no_backup:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_dir = project_dir / f"episodes_backup_{timestamp}"
        p(f"\nBacking up originals → {backup_dir.name}/")
        shutil.copytree(episodes_dir, backup_dir)

    # Initialize Gemini
    genai = get_genai()

    # Process episodes
    results = {"success": 0, "failed": 0, "validation_warnings": 0, "episodes": []}
    start_time = time.time()

    for i, ep_path in enumerate(eps_to_process):
        ep_num = re.search(r'(\d+)', ep_path.stem).group()
        p(f"\n[{i+1}/{len(eps_to_process)}] Episode {ep_num}...")

        original_text = load_episode(ep_path)
        original_words = len(original_text.split())

        try:
            trimmed = trim_episode(genai, args.model, original_text, character_bible, findings_summary)

            # Clean any markdown fences Gemini might add
            if trimmed.startswith("```"):
                trimmed = re.sub(r'^```\w*\n', '', trimmed)
                trimmed = re.sub(r'\n```$', '', trimmed)

            trimmed_words = len(trimmed.split())
            delta = trimmed_words - original_words
            delta_pct = (delta / original_words * 100) if original_words > 0 else 0

            p(f"  {original_words} → {trimmed_words} words ({delta_pct:+.0f}%)")

            if not args.dry_run:
                ep_path.write_text(trimmed + "\n")

                # Validate
                is_valid, details = validate_episode(ep_path)
                if not is_valid:
                    p(f"  ⚠ Validation warning — may need manual review")
                    results["validation_warnings"] += 1
            else:
                # Dry run — print a preview
                lines = trimmed.split("\n")
                p("  --- PREVIEW (first 20 lines) ---")
                for line in lines[:20]:
                    p(f"  | {line}")
                if len(lines) > 20:
                    p(f"  | ... ({len(lines) - 20} more lines)")
                p("  --- END PREVIEW ---")

            results["success"] += 1
            results["episodes"].append({
                "episode": int(ep_num),
                "original_words": original_words,
                "trimmed_words": trimmed_words,
                "delta_pct": round(delta_pct, 1),
            })

        except Exception as e:
            p(f"  ERROR: {e}")
            results["failed"] += 1
            results["episodes"].append({
                "episode": int(ep_num),
                "error": str(e),
            })

        # Rate limiting (skip on last episode)
        if i < len(eps_to_process) - 1:
            time.sleep(RATE_LIMIT_DELAY)

    elapsed = time.time() - start_time

    # Summary
    p(f"\n{'=' * 60}")
    p("TRIM PASS — SUMMARY")
    p(f"{'=' * 60}")
    p(f"  Processed:  {results['success']} episodes")
    p(f"  Failed:     {results['failed']}")
    p(f"  Warnings:   {results['validation_warnings']}")
    p(f"  Time:       {elapsed:.0f}s ({elapsed/len(eps_to_process):.1f}s per episode)")

    if results["episodes"]:
        deltas = [e["delta_pct"] for e in results["episodes"] if "delta_pct" in e]
        if deltas:
            avg_delta = sum(deltas) / len(deltas)
            p(f"  Avg change: {avg_delta:+.1f}%")

    p(f"{'=' * 60}")

    # Save results
    results_path = project_dir / "state" / "trim_pass_results.json"
    results["timestamp"] = datetime.now().isoformat()
    results["model"] = args.model
    results["project"] = args.project
    with open(results_path, "w") as f:
        json.dump(results, f, indent=2)
    p(f"\nResults saved to: {results_path}")


if __name__ == "__main__":
    main()
