#!/usr/bin/env python3
"""
Intra-Episode Transition Test — Check Kill Box section transitions within episodes.

Quick test to see if THEREFORE/BUT/AND THEN analysis is useful WITHIN episodes,
not just between them. Analyzes the transitions between Kill Box sections:
  HOOK → SETUP → ESCALATION → TURN → CLIFFHANGER

Usage:
    python3 intra_episode_test.py [project] [start] [end]
    python3 intra_episode_test.py leviathan 1 10
    python3 intra_episode_test.py leviathan 1 10 --dry-run

Requires:
    pip install google-generativeai
    export GEMINI_API_KEY="your-key-here"
"""

import argparse
import json
import os
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

from lib.model_registry import get_model

DEFAULT_MODEL = get_model("pro", "text")


def find_project_root() -> Path:
    current = Path(__file__).resolve().parent
    while current != current.parent:
        if (current / "tools").is_dir() and (current / "editors").is_dir():
            return current
        current = current.parent
    print("ERROR: Could not find Recoil root.", file=sys.stderr)
    sys.exit(1)


def load_episodes(project_path: Path, start: int, end: int) -> dict:
    """Load episodes in the specified range."""
    episodes_dir = project_path / "episodes"
    episodes = {}
    for ep_num in range(start, end + 1):
        ep_file = episodes_dir / f"ep_{ep_num:03d}.md"
        if ep_file.exists():
            episodes[ep_num] = ep_file.read_text(encoding="utf-8")
        else:
            print(f"WARNING: {ep_file.name} not found", file=sys.stderr)
    return episodes


def build_prompt(episodes: dict) -> str:
    """Build the intra-episode transition analysis prompt."""
    parts = []

    parts.append("""# ROLE

You are a script continuity editor analyzing INTERNAL scene transitions within individual
episodes of a vertical microdrama series. Each episode follows a 5-section "Kill Box" structure:

1. **THE HOOK** [00:00-00:05] — Immediate tension or mystery
2. **THE SETUP** [00:05-00:15] — Establish stakes, character in conflict
3. **THE ESCALATION** [00:15-00:40] — Pressure increases, new obstacle
4. **THE TURN** [00:40-00:70] — Unexpected shift, stakes raised
5. **THE CLIFFHANGER** [00:70-00:90] — Cut mid-action or aftermath

Your job: analyze every transition between adjacent Kill Box sections WITHIN each episode.

# THE RULE: THEREFORE / BUT — NEVER "AND THEN"

This is the South Park rule (Trey Parker & Matt Stone): every scene transition should
connect via THEREFORE (causal consequence) or BUT (complication/subversion), never
AND THEN (merely chronological sequence).

Apply this to every Kill Box boundary:
- HOOK → SETUP: Does the setup flow from the hook? Or just follow it?
- SETUP → ESCALATION: Does the escalation arise from what's established? Or appear arbitrarily?
- ESCALATION → TURN: Does the turn subvert or complicate the escalation? Or just happen next?
- TURN → CLIFFHANGER: Does the cliffhanger emerge from the turn? Or feel disconnected?

**THEREFORE** — Section B happens BECAUSE of Section A.
Example: HOOK shows Jinx finding a body → THEREFORE → SETUP has her scanning for the killer

**BUT** — Section B complicates or subverts Section A.
Example: SETUP shows them hiding successfully → BUT → ESCALATION reveals they're hiding in the wrong place

**AND THEN** — Section B merely follows Section A with no causal link.
Example: ESCALATION shows a chase → AND THEN → TURN introduces a completely unrelated character

# WHAT TO CHECK AT EACH INTERNAL BOUNDARY

For each Kill Box section transition, evaluate:

1. **Causal Logic** — Does the next section arise from the previous one?
   - Is there a cause-effect relationship?
   - Does the audience understand WHY we moved to this new beat?

2. **Escalation Quality** — Does pressure genuinely increase?
   - Each section should ratchet stakes higher
   - Flat transitions (same stakes, just new activity) are AND THEN

3. **Spatial Logic** — If the location changes within the episode, is it motivated?
   - Internal location shifts should have clear reason

4. **Emotional Momentum** — Does the emotional intensity build?
   - Drops in emotional intensity between sections may indicate AND THEN

5. **Information Flow** — Does new information drive the next beat?
   - THEREFORE: "She sees the tracker" → therefore she runs
   - AND THEN: "She sees the tracker" → and then she's in a different room

# SEVERITY

- **P1**: The section transition breaks the episode's internal logic. Audience is confused about why we're in this new beat.
- **P2**: The transition is inferrable but lazy. The escalation is more "next thing happens" than "this CAUSES the next thing."
- **P3**: Minor. Works but could be tighter — a stronger causal connector would improve flow.

# OUTPUT FORMAT

Return ONLY valid JSON (no markdown fences):

{
  "findings": [
    {
      "episode": 3,
      "transition": "SETUP → ESCALATION",
      "connector": "AND THEN",
      "severity": "P1|P2|P3",
      "section_a_summary": "Brief description of what happens at end of Section A",
      "section_b_summary": "Brief description of what happens at start of Section B",
      "description": "What's wrong and why it matters",
      "suggested_fix": "How to strengthen the causal link"
    }
  ],
  "episode_grades": [
    {
      "episode": 1,
      "grade": "A|B|C|D|F",
      "internal_flow": "Brief assessment of how well the Kill Box sections connect",
      "weakest_transition": "Which boundary is weakest (e.g., 'ESCALATION → TURN')",
      "transition_breakdown": {
        "hook_to_setup": "THEREFORE|BUT|AND THEN",
        "setup_to_escalation": "THEREFORE|BUT|AND THEN",
        "escalation_to_turn": "THEREFORE|BUT|AND THEN",
        "turn_to_cliffhanger": "THEREFORE|BUT|AND THEN"
      }
    }
  ],
  "summary": {
    "total_transitions_checked": 40,
    "therefore_count": 0,
    "but_count": 0,
    "and_then_count": 0,
    "p1_count": 0,
    "p2_count": 0,
    "p3_count": 0,
    "overall_assessment": "1-2 sentence summary"
  }
}

**IMPORTANT:**
- Only report transitions with issues (AND THEN or weak causal links).
- Grade EVERY episode regardless of whether it has issues.
- Be specific about WHAT breaks the causal chain.
- Suggested fixes should be concrete — not "make it more causal" but "add a line where X triggers Y."

# EPISODES
""")

    for ep_num in sorted(episodes.keys()):
        parts.append(f"\n## EPISODE {ep_num}\n\n{episodes[ep_num]}\n")

    return "\n".join(parts)


def call_gemini(prompt: str, model: str = DEFAULT_MODEL) -> str:
    """Call Gemini API and return the response text."""
    import google.generativeai as genai

    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("ERROR: GEMINI_API_KEY not set.", file=sys.stderr)
        sys.exit(1)

    genai.configure(api_key=api_key)

    config = genai.GenerationConfig(
        temperature=0.3,
        max_output_tokens=16384,
        response_mime_type="application/json",
    )

    model_obj = genai.GenerativeModel(model)

    print(f"Calling Gemini ({model})...", flush=True)
    start = time.time()
    response = model_obj.generate_content(prompt, generation_config=config)
    elapsed = time.time() - start
    print(f"Response received in {elapsed:.1f}s", flush=True)

    return response.text


def main():
    parser = argparse.ArgumentParser(description="Intra-Episode Transition Test")
    parser.add_argument("project", help="Project directory name")
    parser.add_argument("start", type=int, help="Start episode number")
    parser.add_argument("end", type=int, help="End episode number")
    parser.add_argument("--dry-run", action="store_true", help="Save prompt without calling API")
    parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model")
    args = parser.parse_args()

    root = find_project_root()
    project_path = root / args.project
    episodes = load_episodes(project_path, args.start, args.end)
    print(f"Loaded {len(episodes)} episodes (ep {args.start}-{args.end})")

    prompt = build_prompt(episodes)
    word_count = len(prompt.split())
    print(f"Prompt: {word_count:,} words (~{int(word_count * 1.3):,} tokens)")

    state_dir = project_path / "state"
    state_dir.mkdir(parents=True, exist_ok=True)

    if args.dry_run:
        payload_path = state_dir / "intra_episode_test_payload.txt"
        payload_path.write_text(prompt, encoding="utf-8")
        print(f"Payload saved to {payload_path}")
        return

    response_text = call_gemini(prompt, args.model)

    # Parse JSON
    try:
        cleaned = response_text.strip()
        if cleaned.startswith("```"):
            cleaned = re.sub(r"^```\w*\n?", "", cleaned)
            cleaned = re.sub(r"\n?```$", "", cleaned)
        results = json.loads(cleaned)
    except json.JSONDecodeError as e:
        print(f"WARNING: Could not parse JSON: {e}", file=sys.stderr)
        raw_path = state_dir / "intra_episode_test_raw.txt"
        raw_path.write_text(response_text, encoding="utf-8")
        print(f"Raw response saved to {raw_path}")
        return

    # Add metadata
    results["generated"] = datetime.now(timezone.utc).isoformat()
    results["model"] = args.model
    results["project"] = args.project
    results["episode_range"] = [args.start, args.end]

    # Save
    output_path = state_dir / "intra_episode_test.json"
    output_path.write_text(json.dumps(results, indent=2), encoding="utf-8")
    print(f"\nResults saved to {output_path}")

    # Print summary
    summary = results.get("summary", {})
    findings = results.get("findings", [])
    grades = results.get("episode_grades", [])

    print(f"\n{'='*60}")
    print(f"INTRA-EPISODE TRANSITION TEST — {args.project} ep {args.start}-{args.end}")
    print(f"{'='*60}")
    print(f"Transitions checked: {summary.get('total_transitions_checked', '?')}")
    print(f"THEREFORE: {summary.get('therefore_count', '?')}")
    print(f"BUT:       {summary.get('but_count', '?')}")
    print(f"AND THEN:  {summary.get('and_then_count', '?')}")
    print(f"P1 issues: {summary.get('p1_count', '?')}")
    print(f"P2 issues: {summary.get('p2_count', '?')}")
    print(f"P3 issues: {summary.get('p3_count', '?')}")
    print(f"\n{summary.get('overall_assessment', '')}")
    print(f"{'='*60}")

    # Episode grades
    if grades:
        print(f"\nEPISODE GRADES:")
        for g in sorted(grades, key=lambda x: x.get("episode", 0)):
            ep = g.get("episode", "?")
            grade = g.get("grade", "?")
            weak = g.get("weakest_transition", "none")
            flow = g.get("internal_flow", "")
            tb = g.get("transition_breakdown", {})
            connectors = " | ".join([
                f"H→S:{tb.get('hook_to_setup', '?')}",
                f"S→E:{tb.get('setup_to_escalation', '?')}",
                f"E→T:{tb.get('escalation_to_turn', '?')}",
                f"T→C:{tb.get('turn_to_cliffhanger', '?')}",
            ])
            print(f"  EP {ep:2d}: [{grade}] {connectors}")
            if weak != "none":
                print(f"         Weakest: {weak}")

    # Findings
    if findings:
        print(f"\nFINDINGS ({len(findings)}):")
        for f in findings:
            sev = f.get("severity", "?")
            ep = f.get("episode", "?")
            trans = f.get("transition", "?")
            conn = f.get("connector", "?")
            marker = "!!" if sev == "P1" else "!" if sev == "P2" else " "
            print(f"\n{marker} EP {ep} [{sev}] {trans} [{conn}]")
            print(f"  A: {f.get('section_a_summary', '')}")
            print(f"  B: {f.get('section_b_summary', '')}")
            print(f"  {f.get('description', '')}")
            if f.get("suggested_fix"):
                print(f"  FIX: {f['suggested_fix']}")
    else:
        print("\nNo issues found — all internal transitions are causal.")


if __name__ == "__main__":
    main()