#!/usr/bin/env python3
"""
Arc Validation Gate Tool
Validates structure_outline.md, plant_payoff_plan.md, and treatment-format episode_arc.md.

Usage:
    python validate_arc.py <project_path>                    # Standard arc validation
    python validate_arc.py <project_path> --treatment        # Treatment format validation
    python validate_arc.py <project_path> --flag-weak        # Flag weak treatment episodes
    python validate_arc.py <project_path> --pairwise         # Generate pairwise comparison prompts
    python validate_arc.py ../projects/leviathan/development

Standard Mode Requirements:
- Cliffhanger intensity ≥9.0 overall, ≥8.5 per sequence
- Action density ≥20 beats, all sequences have ≥1
- All 11 emotional beats at designated episodes
- No 4+ consecutive same-type cliffhangers (max 3 allowed; see CONSTANTS.md)
- Plant/Payoff: ≥6 threads, ≥1 Object/Phrase/Image, ≥1 echo moment

Treatment Mode Requirements (Enriched Format):
- All 60 episodes have metadata headers (Beat, Hook, Cliffhanger)
- All episodes have rich descriptions (Event, Emotional beat, Relationship)
- Thread markers reference THREAD INDEX
- Hook distribution 70-85% SILENT
- Cliffhanger distribution 70-85% mid-action
- No 4+ consecutive same type (max 3 allowed)

Pairwise Mode (--pairwise):
Generates pairwise comparison prompts for subjective scoring:
- Cliffhanger intensity ranking (tournament bracket methodology)
- Tier 2 scored gates (tension, reversals, stakes, variety, pacing)
Uses reasoning-before-judgment methodology from pairwise_comparison.md.
"""

import json
import sys
import os
import re
from pathlib import Path
from collections import defaultdict

# Import constants from shared module
_SCRIPT_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(_SCRIPT_DIR))
try:
    from engine_constants import (
        WORD_COUNT_MIN, WORD_COUNT_MAX,
        DIALOGUE_MAX_PERCENT, MAX_EXCHANGES,
        MAX_CONSECUTIVE_SAME_TYPE,
        ANTHROPIC_HAIKU,
        llm_gate_call,
        parse_llm_field,
    )
except ImportError:
    WORD_COUNT_MIN = 450
    WORD_COUNT_MAX = 500
    DIALOGUE_MAX_PERCENT = 40
    MAX_EXCHANGES = 8
    MAX_CONSECUTIVE_SAME_TYPE = 3
    ANTHROPIC_HAIKU = "claude-haiku-4-5-20251001"
    llm_gate_call = lambda prompt, *, model, max_tokens=200: None
    parse_llm_field = None

# ANSI colors
GREEN = '\033[92m'
RED = '\033[91m'
YELLOW = '\033[93m'
CYAN = '\033[96m'
BOLD = '\033[1m'
RESET = '\033[0m'

def parse_structure_outline(filepath):
    """Parse structure_outline.md and extract episode data."""
    if not os.path.exists(filepath):
        return None, f"File not found: {filepath}"

    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    episodes = []
    sequences = defaultdict(list)
    current_sequence = None

    # Pattern to match episode rows in tables
    # | Ep | One-Line | Cliffhanger | Type | Intensity |
    # Supports both old format (M, A) and new detailed codes (M-CF, M-PT, A-RE, etc.)
    # New codes: M-PT, M-CF, M-PU, M-CH, M-CT (Mid-Action)
    #            A-RE, A-CO, A-PS, A-SI, A-CT, A-DE (Aftermath)
    episode_pattern = re.compile(
        r'\|\s*(\d+)\s*\|[^|]+\|[^|]+\|\s*\*?\*?(M(?:-[A-Z]{2})?|A(?:-[A-Z]{2})?)\*?\*?\s*\|\s*(\d+(?:\.\d+)?)\s*\|',
        re.IGNORECASE
    )

    # Pattern to match sequence headers
    sequence_pattern = re.compile(r'#\s*SEQUENCE\s*(\d+)', re.IGNORECASE)

    # Pattern to match action beats
    action_pattern = re.compile(r'Action Beats?:\s*(\d+)', re.IGNORECASE)

    lines = content.split('\n')
    sequence_action_beats = {}
    emotional_beat_episodes = []

    for i, line in enumerate(lines):
        # Check for sequence header
        seq_match = sequence_pattern.search(line)
        if seq_match:
            current_sequence = int(seq_match.group(1))

        # Check for action beats count in sequence
        action_match = action_pattern.search(line)
        if action_match and current_sequence:
            sequence_action_beats[current_sequence] = int(action_match.group(1))

        # Check for episode row
        ep_match = episode_pattern.search(line)
        if ep_match:
            ep_num = int(ep_match.group(1))
            cliff_type_full = ep_match.group(2).upper()
            # Normalize detailed codes (M-CF, A-RE) to base type (M, A)
            cliff_type = cliff_type_full[0]  # First letter is M or A
            intensity = float(ep_match.group(3))

            episodes.append({
                'episode': ep_num,
                'type': cliff_type,
                'intensity': intensity,
                'sequence': current_sequence
            })

            if current_sequence:
                sequences[current_sequence].append({
                    'episode': ep_num,
                    'type': cliff_type,
                    'intensity': intensity
                })

            # Check for emotional beat marker
            if '💔' in line:
                emotional_beat_episodes.append(ep_num)

    return {
        'episodes': episodes,
        'sequences': dict(sequences),
        'action_beats': sequence_action_beats,
        'emotional_beats': emotional_beat_episodes
    }, None


def parse_plant_payoff(filepath):
    """Parse plant_payoff_plan.md and extract thread data."""
    if not os.path.exists(filepath):
        return None, f"File not found: {filepath}"

    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    threads = []

    # Pattern to match thread headers
    # ## Thread 1: "EXPENDABLE" → "ESSENTIAL"
    # **Type:** Phrase
    thread_pattern = re.compile(
        r'##\s*Thread\s*\d+[:\s]+([^\n]+)\n+\*\*Type:\*\*\s*(\w+)',
        re.IGNORECASE
    )

    # Pattern to match echo headers
    echo_pattern = re.compile(r'##\s*Echo\s*\d+', re.IGNORECASE)

    for match in thread_pattern.finditer(content):
        thread_name = match.group(1).strip()
        thread_type = match.group(2).strip().lower()
        threads.append({
            'name': thread_name,
            'type': thread_type
        })

    # Count echo moments
    echo_count = len(echo_pattern.findall(content))

    # Categorize threads
    thread_types = defaultdict(int)
    for t in threads:
        thread_types[t['type']] += 1

    return {
        'threads': threads,
        'thread_count': len(threads),
        'thread_types': dict(thread_types),
        'echo_count': echo_count
    }, None


def check_consecutive_types(episodes):
    """Check for consecutive same-type cliffhangers."""
    if not episodes:
        return 0, []

    # Sort by episode number
    sorted_eps = sorted(episodes, key=lambda x: x['episode'])

    max_consecutive = 1
    current_consecutive = 1
    current_type = sorted_eps[0]['type']
    violations = []

    for i in range(1, len(sorted_eps)):
        if sorted_eps[i]['type'] == current_type:
            current_consecutive += 1
            if current_consecutive > max_consecutive:
                max_consecutive = current_consecutive
        else:
            # Run ended — record violation if it exceeded limit
            if current_consecutive > 3:
                violations.append({
                    'type': current_type,
                    'start_ep': sorted_eps[i - current_consecutive]['episode'],
                    'end_ep': sorted_eps[i - 1]['episode'],
                    'count': current_consecutive
                })
            current_type = sorted_eps[i]['type']
            current_consecutive = 1

    # Check final run
    if current_consecutive > 3:
        violations.append({
            'type': current_type,
            'start_ep': sorted_eps[len(sorted_eps) - current_consecutive]['episode'],
            'end_ep': sorted_eps[-1]['episode'],
            'count': current_consecutive
        })

    return max_consecutive, violations


# ---------------------------------------------------------------------------
# Genre Obligation Audit (Structural Gate S6 — Snyder)
# Checks that obligatory scenes for the project's genre are present.
# ---------------------------------------------------------------------------

# Genre-specific obligatory scenes (derived from Snyder's genre tables)
# Each genre has 5-7 scenes that MUST appear in any properly structured story.
GENRE_OBLIGATIONS = {
    'monster_in_the_house': [
        ('sin_committed', 'A sin or transgression that invites the monster'),
        ('monster_revealed', 'The monster/threat is fully revealed'),
        ('team_assembled', 'The team or group forms to fight the monster'),
        ('half_man', 'A team member reveals they are compromised/traitor'),
        ('final_confrontation', 'Direct confrontation with the monster'),
    ],
    'golden_fleece': [
        ('road_apple', 'An obstacle that forces a detour or rethinking'),
        ('team_bonding', 'The team grows closer through shared hardship'),
        ('midpoint_prize', 'A false victory or partial achievement of the goal'),
        ('road_wisdom', 'The real lesson is about the journey, not the destination'),
        ('final_test', 'The hero proves transformation at the goal'),
    ],
    'dude_with_a_problem': [
        ('innocent_hero', 'An ordinary person thrust into extraordinary circumstances'),
        ('sudden_event', 'The inciting event that shatters normal life'),
        ('life_or_death', 'Stakes escalate to life-or-death'),
        ('surrender_option', 'The hero could walk away but chooses not to'),
        ('triumph_through_courage', 'Victory through personal courage, not skill'),
    ],
    'rites_of_passage': [
        ('life_problem', 'A universal life problem the hero must face'),
        ('wrong_way', 'The hero tries the wrong solution first'),
        ('acceptance', 'The hero accepts the truth of their situation'),
        ('new_understanding', 'A fundamental shift in worldview'),
        ('growth_demonstrated', 'The transformation is visible in action'),
    ],
    'buddy_love': [
        ('incomplete_hero', 'The hero is missing something they need'),
        ('counterpart', 'The counterpart fills what the hero lacks'),
        ('complication', 'An external force threatens the relationship'),
        ('black_moment', 'The relationship appears to be destroyed'),
        ('proof_of_love', 'Sacrifice proves the relationship is real'),
    ],
    'whydunit': [
        ('detective_flaw', 'The investigator has a personal flaw that connects to the case'),
        ('dark_turn', 'The investigation reveals something darker than expected'),
        ('secret_exposed', 'A hidden truth is unearthed'),
        ('detective_complicit', 'The investigator realizes they are part of the problem'),
        ('justice_redefined', 'Justice is achieved, but not as expected'),
    ],
    'institutionalized': [
        ('group_entry', 'The hero enters the institution/group'),
        ('brando_figure', 'A charismatic leader or system that demands loyalty'),
        ('company_man', 'A loyalist who embodies the institution\'s values'),
        ('choice_moment', 'The hero must choose: conform or rebel'),
        ('sacrifice_or_escape', 'The hero burns it down or escapes (or both)'),
    ],
    'superhero': [
        ('power_discovery', 'The hero discovers their special ability'),
        ('nemesis_connection', 'The nemesis has a personal connection to the hero'),
        ('curse_of_power', 'The power comes with a personal cost'),
        ('people_in_danger', 'Innocent people are threatened by the conflict'),
        ('self_sacrifice', 'The hero risks everything to save others'),
    ],
    'out_of_the_bottle': [
        ('wish_granted', 'A magical or transformative event occurs'),
        ('fun_and_games', 'The hero enjoys the new reality'),
        ('consequences', 'The transformation has unexpected negative effects'),
        ('lesson_learned', 'The hero realizes what they truly need'),
        ('return_to_normal', 'The world resets, but the hero has changed'),
    ],
    'thriller': [
        ('ticking_clock', 'An explicit time pressure or deadline'),
        ('false_ally', 'Someone the hero trusts is working against them'),
        ('trap_sprung', 'The hero is cornered or captured'),
        ('impossible_odds', 'The situation seems unwinnable'),
        ('reversal', 'The hero turns the tables using something planted earlier'),
    ],
}

# Fallback genre for projects without explicit genre assignment
DEFAULT_GENRE = 'dude_with_a_problem'


def detect_project_genre(project_path):
    """
    Detect genre from ORCHESTRATION.md or series_bible.md.
    Returns (genre_key, genre_source) or (DEFAULT_GENRE, 'default').
    """
    project_path = Path(project_path)

    # Check ORCHESTRATION.md first (production projects)
    orch_path = project_path / 'ORCHESTRATION.md'
    if not orch_path.exists():
        # Try parent path for development projects
        orch_path = project_path.parent / 'ORCHESTRATION.md'

    if orch_path.exists():
        content = orch_path.read_text().lower()
        for genre_key in GENRE_OBLIGATIONS:
            # Convert snake_case to search terms
            search_terms = genre_key.replace('_', ' ')
            if search_terms in content:
                return genre_key, f'ORCHESTRATION.md ({search_terms})'

    # Check series_bible.md
    for bible_path in [project_path / 'bible' / 'series_bible.md',
                       project_path / 'series_bible.md']:
        if bible_path.exists():
            content = bible_path.read_text().lower()
            for genre_key in GENRE_OBLIGATIONS:
                search_terms = genre_key.replace('_', ' ')
                if search_terms in content:
                    return genre_key, f'series_bible.md ({search_terms})'

    return DEFAULT_GENRE, 'default (no genre specified)'


def check_genre_obligations(project_path):
    """
    Check that obligatory scenes for the project's genre are present
    in the structure outline or treatment.

    Returns (passed, results_dict)
    """
    project_path = Path(project_path)
    genre_key, genre_source = detect_project_genre(project_path)
    obligations = GENRE_OBLIGATIONS.get(genre_key, [])

    if not obligations:
        return True, {'genre': genre_key, 'source': genre_source, 'obligations': [], 'missing': []}

    # Load content to search (structure outline, treatment, episode arc)
    search_content = ''
    for candidate in [
        project_path / 'structure_outline.md',
        project_path / 'treatment.md',
        project_path / 'bible' / 'episode_arc.md',
        project_path.parent / 'treatment.md',  # production project
    ]:
        if candidate.exists():
            search_content += '\n' + candidate.read_text().lower()

    if not search_content:
        return True, {'genre': genre_key, 'source': genre_source,
                      'obligations': obligations, 'missing': [],
                      'note': 'No content files found to check against'}

    # Check each obligation
    found = []
    missing = []

    for scene_key, scene_desc in obligations:
        # Generate search terms from the description
        desc_lower = scene_desc.lower()
        # Extract key content words (4+ chars, not common words)
        key_words = [w for w in re.findall(r'\b[a-z]{4,}\b', desc_lower)
                     if w not in ('that', 'with', 'from', 'into', 'they', 'their',
                                  'this', 'have', 'been', 'some', 'about', 'must')]

        # Check if enough key words appear in the content
        matches = sum(1 for w in key_words if w in search_content)
        threshold = max(1, len(key_words) // 3)  # At least 1/3 of key words

        if matches >= threshold:
            found.append((scene_key, scene_desc))
        else:
            missing.append((scene_key, scene_desc))

    results = {
        'genre': genre_key,
        'source': genre_source,
        'obligations': obligations,
        'found': found,
        'missing': missing,
    }

    return len(missing) == 0, results


# ---------------------------------------------------------------------------
# Semantic Escalation Tracker (Scaffolding Gate G4 — McKee)
# Tags each sequence climax into 5 Tactic Buckets. Saves to escalation_log.json.
# This is a LOG-ONLY gate — no fail, just tracking for Script Doctor Phase 5.
# ---------------------------------------------------------------------------

TACTIC_BUCKETS = [
    'violence_force',
    'stealth_deception',
    'negotiation_bargaining',
    'technical_hack',
    'self_sacrifice',
]

_ESCALATION_PROMPT = """You are classifying the climactic resolution of a story sequence.

The 5 Tactic Buckets are:
1. VIOLENCE_FORCE — Physical confrontation, fighting, destruction
2. STEALTH_DECEPTION — Trickery, disguise, misdirection, hiding
3. NEGOTIATION_BARGAINING — Deals, persuasion, compromise, diplomacy
4. TECHNICAL_HACK — Using knowledge, technology, puzzle-solving, engineering
5. SELF_SACRIFICE — Giving up something personal to achieve the goal

Here is the sequence climax description:

<climax>
{climax_text}
</climax>

Which Tactic Bucket best describes HOW the protagonist resolves this sequence?

Output EXACTLY one line:
BUCKET: [one of: VIOLENCE_FORCE, STEALTH_DECEPTION, NEGOTIATION_BARGAINING, TECHNICAL_HACK, SELF_SACRIFICE]"""


def track_semantic_escalation(project_path):
    """
    Tag each sequence climax with a Tactic Bucket and save to escalation_log.json.

    Uses Haiku for classification. Falls back to keyword matching if no API.
    Returns the escalation log dict.
    """
    project_path = Path(project_path)

    # Load structure outline to get sequence climaxes
    structure_path = project_path / 'structure_outline.md'
    if not structure_path.exists():
        return {}

    content = structure_path.read_text()

    # Extract sequence descriptions
    sequences = {}
    current_seq = None
    seq_lines = []

    for line in content.split('\n'):
        seq_match = re.search(r'#\s*SEQUENCE\s*(\d+)', line, re.IGNORECASE)
        if seq_match:
            if current_seq and seq_lines:
                sequences[current_seq] = '\n'.join(seq_lines)
            current_seq = int(seq_match.group(1))
            seq_lines = []
        elif current_seq:
            seq_lines.append(line)

    if current_seq and seq_lines:
        sequences[current_seq] = '\n'.join(seq_lines)

    if not sequences:
        return {}

    log = {}
    for seq_num, seq_text in sorted(sequences.items()):
        # Get last ~500 chars as climax approximation
        climax_text = seq_text[-500:] if len(seq_text) > 500 else seq_text

        prompt = _ESCALATION_PROMPT.format(climax_text=climax_text)
        result = llm_gate_call(prompt, model=ANTHROPIC_HAIKU, max_tokens=50)
        if result:
            try:
                bucket = (parse_llm_field(result, "BUCKET") if parse_llm_field else (result.split("BUCKET:")[-1].strip() if "BUCKET:" in result else None))
                if bucket:
                    bucket = bucket.upper()
                if bucket and bucket.lower() in TACTIC_BUCKETS:
                    log[str(seq_num)] = bucket.lower()
                else:
                    log[str(seq_num)] = _keyword_classify(climax_text)
            except Exception:
                log[str(seq_num)] = _keyword_classify(climax_text)
        else:
            log[str(seq_num)] = _keyword_classify(climax_text)

    # Save to escalation_log.json
    log_path = project_path / 'state' / 'escalation_log.json'
    log_path.parent.mkdir(parents=True, exist_ok=True)
    log_path.write_text(json.dumps(log, indent=2))

    return log


def _keyword_classify(text):
    """Fallback keyword-based tactic bucket classification."""
    text_lower = text.lower()
    scores = {
        'violence_force': sum(1 for w in ['fight', 'attack', 'shoot', 'kill', 'punch', 'weapon', 'blood', 'battle', 'strike', 'force'] if w in text_lower),
        'stealth_deception': sum(1 for w in ['trick', 'deceive', 'hide', 'sneak', 'disguise', 'lie', 'pretend', 'secret', 'cover', 'plant'] if w in text_lower),
        'negotiation_bargaining': sum(1 for w in ['deal', 'negotiate', 'bargain', 'agree', 'offer', 'terms', 'compromise', 'persuade', 'convince', 'alliance'] if w in text_lower),
        'technical_hack': sum(1 for w in ['hack', 'code', 'system', 'override', 'solve', 'puzzle', 'engineer', 'build', 'repair', 'calculate'] if w in text_lower),
        'self_sacrifice': sum(1 for w in ['sacrifice', 'give up', 'surrender', 'trade', 'cost', 'price', 'abandon', 'let go', 'accept', 'willing to die'] if w in text_lower),
    }
    if max(scores.values()) == 0:
        return 'violence_force'  # Default
    return max(scores, key=scores.get)


def validate_arc(project_path):
    """Run full arc validation."""
    project_path = Path(project_path)

    structure_path = project_path / 'structure_outline.md'
    plant_payoff_path = project_path / 'plant_payoff_plan.md'

    results = {
        'tier1': {},
        'tier2': {},
        'passed': True,
        'errors': []
    }

    # Parse files
    structure_data, err = parse_structure_outline(structure_path)
    if err:
        results['errors'].append(err)
        results['passed'] = False
        return results

    plant_data, err = parse_plant_payoff(plant_payoff_path)
    if err:
        results['errors'].append(err)
        results['passed'] = False
        return results

    episodes = structure_data['episodes']
    sequences = structure_data['sequences']

    # === TIER 1: HARD GATES ===

    # 1. Cliffhanger Intensity - Overall
    if episodes:
        overall_intensity = sum(ep['intensity'] for ep in episodes) / len(episodes)
    else:
        overall_intensity = 0

    results['tier1']['overall_intensity'] = {
        'value': round(overall_intensity, 2),
        'required': 9.0,
        'passed': overall_intensity >= 9.0
    }

    # 2. Cliffhanger Intensity - Per Sequence
    sequence_intensities = {}
    all_sequences_pass = True
    lowest_sequence = None
    lowest_value = 10

    for seq_num, seq_eps in sequences.items():
        if seq_eps:
            avg = sum(ep['intensity'] for ep in seq_eps) / len(seq_eps)
            passed = avg >= 8.5
            sequence_intensities[seq_num] = {
                'value': round(avg, 2),
                'passed': passed
            }
            if not passed:
                all_sequences_pass = False
            if avg < lowest_value:
                lowest_value = avg
                lowest_sequence = seq_num

    results['tier1']['sequence_intensities'] = {
        'sequences': sequence_intensities,
        'required': 8.5,
        'passed': all_sequences_pass,
        'lowest': {'sequence': lowest_sequence, 'value': round(lowest_value, 2)} if lowest_sequence else None
    }

    # 3. Action Density
    total_action = sum(structure_data['action_beats'].values())
    all_have_action = all(v >= 1 for v in structure_data['action_beats'].values())

    results['tier1']['action_density'] = {
        'total': total_action,
        'required': 20,
        'passed': total_action >= 20 and all_have_action,
        'per_sequence': structure_data['action_beats'],
        'all_sequences_have_action': all_have_action
    }

    # 4. Emotional Beats
    required_beats = [10, 15, 20, 26, 30, 32, 36, 42, 45, 50, 59]
    found_beats = structure_data['emotional_beats']

    # Check with ±2 tolerance
    missing_beats = []
    for req in required_beats:
        found = any(abs(f - req) <= 2 for f in found_beats)
        if not found:
            missing_beats.append(req)

    results['tier1']['emotional_beats'] = {
        'found': len(found_beats),
        'required': 11,
        'passed': len(missing_beats) == 0,
        'missing': missing_beats
    }

    # 5. Cliffhanger Type Distribution
    max_consecutive, violations = check_consecutive_types(episodes)

    results['tier1']['type_distribution'] = {
        'max_consecutive': max_consecutive,
        'required': 3,
        'passed': max_consecutive <= 3,
        'violations': violations
    }

    # 6. Plant/Payoff Threads
    thread_count = plant_data['thread_count']
    thread_types = plant_data['thread_types']
    echo_count = plant_data['echo_count']

    has_object = thread_types.get('object', 0) >= 1
    has_phrase = thread_types.get('phrase', 0) >= 1
    has_image = thread_types.get('image', 0) >= 1
    has_echo = echo_count >= 1

    results['tier1']['plant_payoff'] = {
        'thread_count': thread_count,
        'required_threads': 6,
        'thread_types': thread_types,
        'has_object': has_object,
        'has_phrase': has_phrase,
        'has_image': has_image,
        'echo_count': echo_count,
        'has_echo': has_echo,
        'passed': thread_count >= 6 and has_object and has_phrase and has_image and has_echo
    }

    # 7. Genre Obligation Audit (Structural Gate S6)
    genre_passed, genre_results = check_genre_obligations(project_path)
    results['tier1']['genre_obligations'] = {
        'genre': genre_results['genre'],
        'source': genre_results['source'],
        'total': len(genre_results.get('obligations', [])),
        'found': len(genre_results.get('found', [])),
        'missing': genre_results.get('missing', []),
        'passed': genre_passed,
    }

    # Calculate overall pass/fail
    tier1_passed = all([
        results['tier1']['overall_intensity']['passed'],
        results['tier1']['sequence_intensities']['passed'],
        results['tier1']['action_density']['passed'],
        results['tier1']['emotional_beats']['passed'],
        results['tier1']['type_distribution']['passed'],
        results['tier1']['plant_payoff']['passed'],
        results['tier1']['genre_obligations']['passed'],
    ])

    results['tier1']['all_passed'] = tier1_passed
    results['passed'] = tier1_passed

    # === TRACKING (Log-only, no fail) ===

    # Semantic Escalation Tracker (G4)
    escalation_log = track_semantic_escalation(project_path)
    results['tracking'] = {
        'escalation_log': escalation_log,
    }

    # Check for consecutive same-bucket sequences
    if escalation_log:
        buckets = [escalation_log.get(str(i)) for i in sorted(int(k) for k in escalation_log)]
        consecutive_same = 1
        max_consecutive_same = 1
        for i in range(1, len(buckets)):
            if buckets[i] == buckets[i-1]:
                consecutive_same += 1
                max_consecutive_same = max(max_consecutive_same, consecutive_same)
            else:
                consecutive_same = 1
        results['tracking']['max_consecutive_same_bucket'] = max_consecutive_same
        results['tracking']['has_repetition_warning'] = max_consecutive_same >= 3

    return results


def print_report(results, project_name):
    """Print formatted validation report."""
    print(f"\n{'═' * 65}")
    print(f"{BOLD}ARC VALIDATION GATE: {project_name.upper()}{RESET}")
    print(f"{'═' * 65}\n")

    print(f"{BOLD}TIER 1: HARD GATES{RESET}")
    print(f"{'─' * 65}\n")

    # Overall Intensity
    oi = results['tier1']['overall_intensity']
    status = f"{GREEN}✓{RESET}" if oi['passed'] else f"{RED}✗{RESET}"
    print("CLIFFHANGER INTENSITY")
    print(f"  Overall Average:     {oi['value']}/10 {status} (≥{oi['required']})")

    # Per-Sequence
    si = results['tier1']['sequence_intensities']
    print("\n  Per-Sequence:")
    for seq_num in sorted(si['sequences'].keys()):
        seq = si['sequences'][seq_num]
        status = f"{GREEN}✓{RESET}" if seq['passed'] else f"{RED}✗{RESET}"
        print(f"    SEQ {seq_num}: {seq['value']}/10 {status}")

    # Action Density
    ad = results['tier1']['action_density']
    status = f"{GREEN}✓{RESET}" if ad['passed'] else f"{RED}✗{RESET}"
    print("\nACTION DENSITY")
    print(f"  Total Action Beats:  {ad['total']}/{ad['required']} minimum {status}")
    dist_status = f"{GREEN}✓{RESET}" if ad['all_sequences_have_action'] else f"{RED}✗{RESET}"
    print(f"  All sequences ≥1:    {dist_status}")

    # Emotional Beats
    eb = results['tier1']['emotional_beats']
    status = f"{GREEN}✓{RESET}" if eb['passed'] else f"{RED}✗{RESET}"
    print("\nEMOTIONAL BEATS")
    print(f"  Scheduled:           {eb['found']}/{eb['required']} {status}")
    if eb['missing']:
        print(f"  {RED}Missing at:          Ep {', '.join(map(str, eb['missing']))}{RESET}")

    # Type Distribution
    td = results['tier1']['type_distribution']
    status = f"{GREEN}✓{RESET}" if td['passed'] else f"{RED}✗{RESET}"
    print("\nCLIFFHANGER DISTRIBUTION")
    print(f"  Max consecutive:     {td['max_consecutive']} (≤{td['required']}) {status}")
    if td['violations']:
        for v in td['violations']:
            print(f"  {RED}Violation: {v['count']}x {v['type']} from Ep {v['start_ep']}-{v['end_ep']}{RESET}")

    # Plant/Payoff
    pp = results['tier1']['plant_payoff']
    status = f"{GREEN}✓{RESET}" if pp['passed'] else f"{RED}✗{RESET}"
    print("\nPLANT/PAYOFF THREADS")
    print(f"  Total threads:       {pp['thread_count']} (≥{pp['required_threads']}) {status}")

    obj_status = f"{GREEN}✓{RESET}" if pp['has_object'] else f"{RED}✗{RESET}"
    phrase_status = f"{GREEN}✓{RESET}" if pp['has_phrase'] else f"{RED}✗{RESET}"
    image_status = f"{GREEN}✓{RESET}" if pp['has_image'] else f"{RED}✗{RESET}"
    echo_status = f"{GREEN}✓{RESET}" if pp['has_echo'] else f"{RED}✗{RESET}"

    print(f"  Object threads:      {pp['thread_types'].get('object', 0)} {obj_status}")
    print(f"  Phrase threads:      {pp['thread_types'].get('phrase', 0)} {phrase_status}")
    print(f"  Image threads:       {pp['thread_types'].get('image', 0)} {image_status}")
    print(f"  Echo moments:        {pp['echo_count']} {echo_status}")

    # Genre Obligations
    go = results['tier1'].get('genre_obligations', {})
    if go:
        status = f"{GREEN}✓{RESET}" if go.get('passed', True) else f"{RED}✗{RESET}"
        genre_display = go.get('genre', 'unknown').replace('_', ' ').title()
        print("\nGENRE OBLIGATIONS (S6)")
        print(f"  Genre:               {genre_display} (via {go.get('source', '?')})")
        print(f"  Obligatory scenes:   {go.get('found', 0)}/{go.get('total', 0)} {status}")
        if go.get('missing'):
            for scene_key, scene_desc in go['missing']:
                print(f"  {RED}Missing: {scene_desc}{RESET}")

    # Final Result
    print(f"\n{'═' * 65}")
    if results['passed']:
        print(f"{BOLD}{GREEN}TIER 1 RESULT: PASS{RESET}")
    else:
        print(f"{BOLD}{RED}TIER 1 RESULT: FAIL{RESET}")
        print(f"\n{YELLOW}FIX REQUIRED:{RESET}")
        if not results['tier1']['overall_intensity']['passed']:
            print("  - Raise overall cliffhanger intensity to ≥9.0")
        if not results['tier1']['sequence_intensities']['passed']:
            print("  - Raise all sequence averages to ≥8.5")
        if not results['tier1']['action_density']['passed']:
            print("  - Add more action beats (need ≥20 total, ≥1 per sequence)")
        if not results['tier1']['emotional_beats']['passed']:
            print(f"  - Add emotional beats at episodes: {', '.join(map(str, results['tier1']['emotional_beats']['missing']))}")
        if not results['tier1']['type_distribution']['passed']:
            print("  - Break up consecutive same-type cliffhangers")
        if go and not go.get('passed', True):
            print("  - Add missing genre obligatory scenes to treatment")
        if not results['tier1']['plant_payoff']['passed']:
            print("  - Add more plant/payoff threads (need ≥6, all types)")

    # Tracking info (log-only, no fail)
    tracking = results.get('tracking', {})
    escalation = tracking.get('escalation_log', {})
    if escalation:
        print(f"\n{BOLD}TRACKING: SEMANTIC ESCALATION (G4){RESET}")
        print(f"{'─' * 65}")
        bucket_labels = {
            'violence_force': 'Violence/Force',
            'stealth_deception': 'Stealth/Deception',
            'negotiation_bargaining': 'Negotiation',
            'technical_hack': 'Technical Hack',
            'self_sacrifice': 'Self-Sacrifice',
        }
        for seq_num in sorted(int(k) for k in escalation):
            bucket = escalation[str(seq_num)]
            label = bucket_labels.get(bucket, bucket)
            print(f"  SEQ {seq_num}: {label}")

        if tracking.get('has_repetition_warning'):
            max_rep = tracking.get('max_consecutive_same_bucket', 0)
            print(f"\n  {YELLOW}⚠ {max_rep} consecutive sequences use the same tactic bucket.{RESET}")
            print(f"  {YELLOW}  Script Doctor should force a category shift in rewrites.{RESET}")

        print("\n  Saved: state/escalation_log.json")

    print(f"{'═' * 65}\n")

    return results['passed']


# ============================================================================
# TREATMENT FORMAT VALIDATION
# ============================================================================

# Vague phrase patterns that indicate weak dramatic actions
VAGUE_PHRASES = [
    r'tensions?\s+rise',
    r'things?\s+escalat',
    r'relationship\s+deep',
    r'realizations?\s+occur',
    r'stakes?\s+increas',
    r'something\s+chang',
    r'learns?\s+that',
    r'discovers?\s+that(?!\s+\w+\s+(is|was|has|had|are|were))',  # Allow "discovers that X is Y"
    r'things?\s+get\s+(complicated|worse|better)',
    r'situation\s+(develops|evolves|changes)',
    r'conflict\s+(grows|develops|intensifies)',
]

VAGUE_PATTERN = re.compile('|'.join(VAGUE_PHRASES), re.IGNORECASE)

# Valid beat types
VALID_BEAT_TYPES = ['SETUP', 'CATALYST', 'LOCK-IN', 'COMPLICATION', 'COLLISION', 'CRISIS', 'REVELATION', 'CLIMAX', 'RESOLUTION']

# Valid hook types
VALID_HOOK_TYPES = ['SILENT', 'DIALOGUE']

# Valid cliffhanger types
VALID_CLIFFHANGER_TYPES = ['MID-ACTION', 'AFTERMATH']


def parse_treatment_arc(filepath):
    """Parse treatment-format episode_arc.md and extract episode data.

    Supports the enriched format with:
    - **Beat:** X
    - **Hook:** Y | **Cliffhanger:** Z
    - **Threads:** [PLANT: X]
    - Rich content (Event paragraph, Cass, Hera, Emotional beat, Relationship)
    """
    if not os.path.exists(filepath):
        return None, f"File not found: {filepath}"

    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    episodes = []

    # Pattern to match treatment format episodes
    # ### Episode N: "Title"
    episode_header_pattern = re.compile(
        r'###\s*Episode\s*(\d+):\s*"([^"]+)"',
        re.IGNORECASE
    )

    # Pattern to match beat type (supports both formats)
    # - **Beat:** SETUP  (enriched format)
    # - Beat: SETUP      (simple format)
    beat_pattern = re.compile(
        r'-\s*\*?\*?Beat:?\*?\*?\s*(\w+[-\w]*)',
        re.IGNORECASE
    )

    # Pattern to match threads (supports both formats)
    # - **Threads:** [PLANT: X]  (enriched format)
    # - Threads: [PLANT: X]      (simple format)
    thread_pattern = re.compile(
        r'\[(PLANT|ADVANCE|PAYOFF|COLLISION):\s*([^\]]+)\]',
        re.IGNORECASE
    )

    # Pattern to match hook/cliffhanger (supports both formats)
    # - **Hook:** SILENT | **Cliffhanger:** MID-ACTION  (enriched format)
    # - Hook: SILENT | Cliffhanger: MID-ACTION          (simple format)
    hook_cliff_pattern = re.compile(
        r'-\s*\*?\*?Hook:?\*?\*?\s*(SILENT|DIALOGUE)\s*\|\s*\*?\*?Cliffhanger:?\*?\*?\s*(MID-ACTION|AFTERMATH)',
        re.IGNORECASE
    )

    # Pattern to match rich content fields (enriched format)
    cass_pattern = re.compile(r'\*\*Cass:\*\*\s*(.+?)(?=\n\*\*|\n---|\n###|$)', re.DOTALL | re.IGNORECASE)
    hera_pattern = re.compile(r'\*\*Hera:\*\*\s*(.+?)(?=\n\*\*|\n---|\n###|$)', re.DOTALL | re.IGNORECASE)
    emotional_pattern = re.compile(r'\*\*Emotional\s+beat:\*\*\s*(.+?)(?=\n\*\*|\n---|\n###|$)', re.DOTALL | re.IGNORECASE)
    relationship_pattern = re.compile(r'\*\*Relationship:\*\*\s*(.+?)(?=\n\*\*|\n---|\n###|$)', re.DOTALL | re.IGNORECASE)

    # Fallback: Pattern to match dramatic action line (legacy format)
    action_pattern = re.compile(
        r'-\s*Dramatic\s*action:\s*(.+?)(?:\n|$)',
        re.IGNORECASE
    )

    # Pattern to match tension note
    tension_pattern = re.compile(
        r'-\s*Tension\s*note:\s*(.+?)(?:\n|$)',
        re.IGNORECASE
    )

    # Split content into episode blocks
    episode_blocks = re.split(r'(?=###\s*Episode\s*\d+)', content)

    for block in episode_blocks:
        header_match = episode_header_pattern.search(block)
        if not header_match:
            continue

        ep_num = int(header_match.group(1))
        title = header_match.group(2)

        episode_data = {
            'episode': ep_num,
            'title': title,
            'beat_type': None,
            'threads': [],
            'hook_type': None,
            'cliffhanger_type': None,
            'has_rich_content': False,
            'has_cass': False,
            'has_hera': False,
            'has_emotional_beat': False,
            'has_relationship': False,
            'dramatic_action': None,  # Legacy field
            'tension_note': None,
            'is_treatment_format': False,
            'issues': []
        }

        # Extract beat type
        beat_match = beat_pattern.search(block)
        if beat_match:
            beat_type = beat_match.group(1).upper()
            episode_data['beat_type'] = beat_type
            if beat_type not in VALID_BEAT_TYPES:
                episode_data['issues'].append(f"Invalid beat type: {beat_type}")

        # Extract hook/cliffhanger
        hc_match = hook_cliff_pattern.search(block)
        if hc_match:
            episode_data['hook_type'] = hc_match.group(1).upper()
            episode_data['cliffhanger_type'] = hc_match.group(2).upper()
            episode_data['is_treatment_format'] = True

        # Extract threads
        for thread_match in thread_pattern.finditer(block):
            marker_type = thread_match.group(1).upper()
            thread_name = thread_match.group(2).strip()
            episode_data['threads'].append({
                'type': marker_type,
                'name': thread_name
            })

        # Check for rich content (enriched format)
        if cass_pattern.search(block):
            episode_data['has_cass'] = True
            episode_data['has_rich_content'] = True
        if hera_pattern.search(block):
            episode_data['has_hera'] = True
            episode_data['has_rich_content'] = True
        if emotional_pattern.search(block):
            episode_data['has_emotional_beat'] = True
            episode_data['has_rich_content'] = True
        if relationship_pattern.search(block):
            episode_data['has_relationship'] = True
            episode_data['has_rich_content'] = True

        # Legacy: Check for dramatic action line
        action_match = action_pattern.search(block)
        if action_match:
            episode_data['dramatic_action'] = action_match.group(1).strip()
            # Check for vague phrases in legacy format
            if VAGUE_PATTERN.search(episode_data['dramatic_action']):
                matches = VAGUE_PATTERN.findall(episode_data['dramatic_action'])
                episode_data['issues'].append(f"Vague phrase: {', '.join(matches)}")

        # Extract tension note
        tension_match = tension_pattern.search(block)
        if tension_match:
            episode_data['tension_note'] = tension_match.group(1).strip()

        # Mark as treatment format if we have metadata
        if episode_data['beat_type'] and episode_data['hook_type']:
            episode_data['is_treatment_format'] = True

        episodes.append(episode_data)

    return episodes, None


def validate_treatment(project_path):
    """Run treatment format validation."""
    project_path = Path(project_path)

    # Try multiple locations for episode_arc.md
    arc_paths = [
        project_path / 'bible' / 'episode_arc.md',
        project_path / 'development' / 'episode_arc.md',
        project_path / 'episode_arc.md',
    ]

    arc_path = None
    for path in arc_paths:
        if path.exists():
            arc_path = path
            break

    if not arc_path:
        return {
            'passed': False,
            'errors': ['episode_arc.md not found in bible/, development/, or project root'],
            'episodes': [],
            'hard_gates': {},
            'soft_flags': {}
        }

    episodes, err = parse_treatment_arc(arc_path)
    if err:
        return {
            'passed': False,
            'errors': [err],
            'episodes': [],
            'hard_gates': {},
            'soft_flags': {}
        }

    results = {
        'passed': True,
        'errors': [],
        'episodes': episodes,
        'hard_gates': {},
        'soft_flags': {},
        'arc_path': str(arc_path)
    }

    # === HARD GATES ===

    # 1. Coverage - all 60 episodes have treatment metadata
    treatment_eps = [ep for ep in episodes if ep['is_treatment_format']]
    results['hard_gates']['coverage'] = {
        'found': len(treatment_eps),
        'required': 60,
        'passed': len(treatment_eps) >= 60,
        'missing': [i for i in range(1, 61) if i not in [ep['episode'] for ep in treatment_eps]]
    }

    # 2. Rich content - all episodes have enriched descriptions (or legacy dramatic action)
    content_issues = []
    for ep in treatment_eps:
        # Enriched format: check for rich content fields
        if ep['has_rich_content']:
            # Check that we have essential fields
            if not ep['has_emotional_beat']:
                content_issues.append({'episode': ep['episode'], 'issue': 'missing Emotional beat'})
        elif ep['dramatic_action']:
            # Legacy format: check word count
            wc = len(ep['dramatic_action'].split())
            if wc < 10:
                content_issues.append({'episode': ep['episode'], 'issue': f'dramatic action too short ({wc} words)'})
            elif wc > 25:
                content_issues.append({'episode': ep['episode'], 'issue': f'dramatic action too long ({wc} words)'})
        else:
            # No content at all
            content_issues.append({'episode': ep['episode'], 'issue': 'missing rich content or dramatic action'})

    results['hard_gates']['content'] = {
        'passed': len(content_issues) == 0,
        'issues': content_issues,
        'rich_content_count': len([ep for ep in treatment_eps if ep['has_rich_content']]),
        'legacy_format_count': len([ep for ep in treatment_eps if ep['dramatic_action'] and not ep['has_rich_content']])
    }

    # 3. Hook ratio - 70-85% SILENT
    hooks = [ep['hook_type'] for ep in treatment_eps if ep['hook_type']]
    silent_count = hooks.count('SILENT')
    dialogue_count = hooks.count('DIALOGUE')
    total_hooks = len(hooks)

    if total_hooks > 0:
        silent_pct = (silent_count / total_hooks) * 100
        hook_passed = 70 <= silent_pct <= 85
    else:
        silent_pct = 0
        hook_passed = False

    results['hard_gates']['hook_ratio'] = {
        'silent_count': silent_count,
        'dialogue_count': dialogue_count,
        'silent_pct': round(silent_pct, 1),
        'target_range': '70-85%',
        'passed': hook_passed
    }

    # 4. Cliffhanger ratio - 70-85% MID-ACTION
    cliffs = [ep['cliffhanger_type'] for ep in treatment_eps if ep['cliffhanger_type']]
    midaction_count = cliffs.count('MID-ACTION')
    aftermath_count = cliffs.count('AFTERMATH')
    total_cliffs = len(cliffs)

    if total_cliffs > 0:
        midaction_pct = (midaction_count / total_cliffs) * 100
        cliff_passed = 70 <= midaction_pct <= 85
    else:
        midaction_pct = 0
        cliff_passed = False

    results['hard_gates']['cliffhanger_ratio'] = {
        'midaction_count': midaction_count,
        'aftermath_count': aftermath_count,
        'midaction_pct': round(midaction_pct, 1),
        'target_range': '70-85%',
        'passed': cliff_passed
    }

    # 5. Pattern variety - no 4+ consecutive same type (max 3 allowed)
    def check_consecutive(items, max_allowed=3):
        if not items:
            return True, []
        violations = []
        current_type = items[0]
        count = 1
        start_idx = 0

        for i in range(1, len(items)):
            if items[i] == current_type:
                count += 1
                if count > max_allowed:
                    violations.append({
                        'type': current_type,
                        'start': start_idx + 1,  # 1-indexed
                        'end': i + 1,
                        'count': count
                    })
            else:
                current_type = items[i]
                count = 1
                start_idx = i

        return len(violations) == 0, violations

    sorted_eps = sorted(treatment_eps, key=lambda x: x['episode'])
    hook_sequence = [ep['hook_type'] for ep in sorted_eps if ep['hook_type']]
    cliff_sequence = [ep['cliffhanger_type'] for ep in sorted_eps if ep['cliffhanger_type']]

    hook_variety_passed, hook_violations = check_consecutive(hook_sequence)
    cliff_variety_passed, cliff_violations = check_consecutive(cliff_sequence)

    results['hard_gates']['pattern_variety'] = {
        'passed': hook_variety_passed and cliff_variety_passed,
        'hook_violations': hook_violations,
        'cliffhanger_violations': cliff_violations
    }

    # Calculate overall pass
    all_passed = all([
        results['hard_gates']['coverage']['passed'],
        results['hard_gates']['content']['passed'],
        results['hard_gates']['hook_ratio']['passed'],
        results['hard_gates']['cliffhanger_ratio']['passed'],
        results['hard_gates']['pattern_variety']['passed']
    ])
    results['passed'] = all_passed

    # === SOFT FLAGS ===

    # 1. Vague actions
    vague_eps = [ep for ep in treatment_eps if any('Vague phrase' in issue for issue in ep['issues'])]
    results['soft_flags']['vague_actions'] = {
        'count': len(vague_eps),
        'episodes': [{'episode': ep['episode'], 'action': ep['dramatic_action'], 'issues': ep['issues']} for ep in vague_eps]
    }

    # 2. Beat type patterns (4+ consecutive same type)
    beat_sequence = [(ep['episode'], ep['beat_type']) for ep in sorted_eps if ep['beat_type']]
    consecutive_beats = []
    if beat_sequence:
        current_type = beat_sequence[0][1]
        current_run = [beat_sequence[0][0]]

        for ep_num, beat_type in beat_sequence[1:]:
            if beat_type == current_type:
                current_run.append(ep_num)
            else:
                if len(current_run) >= 4:
                    consecutive_beats.append({
                        'type': current_type,
                        'episodes': current_run.copy(),
                        'count': len(current_run)
                    })
                current_type = beat_type
                current_run = [ep_num]

        if len(current_run) >= 4:
            consecutive_beats.append({
                'type': current_type,
                'episodes': current_run,
                'count': len(current_run)
            })

    results['soft_flags']['beat_patterns'] = {
        'count': len(consecutive_beats),
        'sequences': consecutive_beats
    }

    return results


def identify_weak_episodes(project_path):
    """Flag weak treatment episodes for plussing."""
    results = validate_treatment(project_path)

    if results['errors']:
        return results

    weak_episodes = []

    # Collect all weak episodes with reasons
    for ep in results['episodes']:
        reasons = []

        # Check for vague phrases in legacy dramatic action
        if any('Vague phrase' in issue for issue in ep['issues']):
            reasons.extend([issue for issue in ep['issues'] if 'Vague phrase' in issue])

        # Check if treatment format is complete
        if ep['is_treatment_format']:
            if not ep['hook_type']:
                reasons.append("Missing hook type")
            if not ep['cliffhanger_type']:
                reasons.append("Missing cliffhanger type")
            if not ep['beat_type']:
                reasons.append("Missing beat type")
        else:
            reasons.append("Not in treatment format")

        # Check for rich content (enriched format)
        if ep['is_treatment_format'] and not ep['has_rich_content'] and not ep['dramatic_action']:
            reasons.append("Missing rich content or dramatic action")

        # Check for essential fields in enriched format
        if ep['has_rich_content']:
            if not ep['has_emotional_beat']:
                reasons.append("Missing Emotional beat")
        elif ep['dramatic_action']:
            # Legacy format: check word count
            wc = len(ep['dramatic_action'].split())
            if wc < 10:
                reasons.append(f"Dramatic action too short: {wc} words (min 10)")
            elif wc > 25:
                reasons.append(f"Dramatic action too long: {wc} words (max 25)")

        if reasons:
            weak_episodes.append({
                'episode': ep['episode'],
                'title': ep['title'],
                'has_rich_content': ep['has_rich_content'],
                'action': ep['dramatic_action'],
                'reasons': reasons
            })

    results['weak_episodes'] = weak_episodes
    return results


def print_treatment_report(results, project_name):
    """Print formatted treatment validation report."""
    print(f"\n{'═' * 65}")
    print(f"{BOLD}TREATMENT VALIDATION: {project_name.upper()}{RESET}")
    print(f"{'═' * 65}\n")

    if results['errors']:
        print(f"{RED}Errors:{RESET}")
        for err in results['errors']:
            print(f"  - {err}")
        return False

    print(f"{BOLD}HARD GATES{RESET}")
    print(f"{'─' * 65}\n")

    # Coverage
    cov = results['hard_gates']['coverage']
    status = f"{GREEN}✓{RESET}" if cov['passed'] else f"{RED}✗{RESET}"
    print("COVERAGE")
    print(f"  Episodes with treatment: {cov['found']}/{cov['required']} {status}")
    if cov['missing']:
        print(f"  {RED}Missing: Ep {', '.join(map(str, cov['missing'][:10]))}{RESET}")
        if len(cov['missing']) > 10:
            print(f"  {RED}  ...and {len(cov['missing']) - 10} more{RESET}")

    # Content (rich descriptions or legacy dramatic action)
    ct = results['hard_gates']['content']
    status = f"{GREEN}✓{RESET}" if ct['passed'] else f"{RED}✗{RESET}"
    print("\nCONTENT (enriched format)")
    print(f"  Rich content episodes: {ct['rich_content_count']}")
    if ct['legacy_format_count'] > 0:
        print(f"  Legacy format episodes: {ct['legacy_format_count']}")
    print(f"  All content valid: {status}")
    if ct['issues']:
        for issue in ct['issues'][:5]:
            print(f"  {RED}Ep {issue['episode']}: {issue['issue']}{RESET}")
        if len(ct['issues']) > 5:
            print(f"  {RED}  ...and {len(ct['issues']) - 5} more{RESET}")

    # Hook ratio
    hr = results['hard_gates']['hook_ratio']
    status = f"{GREEN}✓{RESET}" if hr['passed'] else f"{RED}✗{RESET}"
    print("\nHOOK DISTRIBUTION")
    print(f"  SILENT:   {hr['silent_count']} ({hr['silent_pct']}%)")
    print(f"  DIALOGUE: {hr['dialogue_count']}")
    print(f"  Target:   {hr['target_range']} SILENT {status}")

    # Cliffhanger ratio
    cr = results['hard_gates']['cliffhanger_ratio']
    status = f"{GREEN}✓{RESET}" if cr['passed'] else f"{RED}✗{RESET}"
    print("\nCLIFFHANGER DISTRIBUTION")
    print(f"  MID-ACTION: {cr['midaction_count']} ({cr['midaction_pct']}%)")
    print(f"  AFTERMATH:  {cr['aftermath_count']}")
    print(f"  Target:     {cr['target_range']} MID-ACTION {status}")

    # Pattern variety
    pv = results['hard_gates']['pattern_variety']
    status = f"{GREEN}✓{RESET}" if pv['passed'] else f"{RED}✗{RESET}"
    print("\nPATTERN VARIETY (max 3 consecutive)")
    print(f"  All patterns varied: {status}")
    if pv['hook_violations']:
        for v in pv['hook_violations']:
            print(f"  {RED}Hook: {v['count']}x {v['type']} (Ep {v['start']}-{v['end']}){RESET}")
    if pv['cliffhanger_violations']:
        for v in pv['cliffhanger_violations']:
            print(f"  {RED}Cliff: {v['count']}x {v['type']} (Ep {v['start']}-{v['end']}){RESET}")

    # Soft flags
    print(f"\n{BOLD}SOFT FLAGS{RESET}")
    print(f"{'─' * 65}\n")

    # Vague actions
    va = results['soft_flags']['vague_actions']
    if va['count'] > 0:
        print(f"{YELLOW}VAGUE ACTIONS: {va['count']} episodes{RESET}")
        for ep in va['episodes'][:5]:
            print(f"  Ep {ep['episode']}: \"{ep['action'][:50]}...\"")
            for issue in ep['issues']:
                print(f"    → {issue}")
        if va['count'] > 5:
            print(f"  ...and {va['count'] - 5} more")
    else:
        print(f"VAGUE ACTIONS: None detected {GREEN}✓{RESET}")

    # Beat patterns
    bp = results['soft_flags']['beat_patterns']
    if bp['count'] > 0:
        print(f"\n{YELLOW}CONSECUTIVE BEAT PATTERNS (4+): {bp['count']} sequences{RESET}")
        for seq in bp['sequences']:
            ep_range = f"Ep {seq['episodes'][0]}-{seq['episodes'][-1]}"
            print(f"  {seq['count']}x {seq['type']} ({ep_range})")
    else:
        print(f"\nCONSECUTIVE BEAT PATTERNS: None detected {GREEN}✓{RESET}")

    # Final result
    print(f"\n{'═' * 65}")
    if results['passed']:
        if va['count'] > 0 or bp['count'] > 0:
            print(f"{BOLD}{YELLOW}RESULT: PASS (with warnings){RESET}")
            weak_eps = [ep['episode'] for ep in va['episodes']]
            if weak_eps:
                print(f"\n{CYAN}To address warnings:{RESET}")
                print(f"  /treatment {project_name} --plus {','.join(map(str, weak_eps[:10]))}")
        else:
            print(f"{BOLD}{GREEN}RESULT: PASS{RESET}")
    else:
        print(f"{BOLD}{RED}RESULT: FAIL{RESET}")
        print(f"\n{YELLOW}FIX REQUIRED:{RESET}")
        if not results['hard_gates']['coverage']['passed']:
            print(f"  - Run /treatment {project_name} to add treatment metadata")
        if not results['hard_gates']['content']['passed']:
            print("  - Add rich content (Event, Cass, Hera, Emotional beat, Relationship)")
        if not results['hard_gates']['hook_ratio']['passed']:
            print("  - Adjust hook distribution to 70-85% SILENT")
        if not results['hard_gates']['cliffhanger_ratio']['passed']:
            print("  - Adjust cliffhanger distribution to 70-85% MID-ACTION")
        if not results['hard_gates']['pattern_variety']['passed']:
            print("  - Break up consecutive same-type patterns")
    print(f"{'═' * 65}\n")

    return results['passed']


def print_weak_episodes_report(results, project_name):
    """Print weak episodes report for plussing."""
    print(f"\n{'═' * 65}")
    print(f"{BOLD}WEAK EPISODE SCAN: {project_name.upper()}{RESET}")
    print(f"{'═' * 65}\n")

    if results['errors']:
        print(f"{RED}Errors:{RESET}")
        for err in results['errors']:
            print(f"  - {err}")
        return

    weak = results.get('weak_episodes', [])

    if not weak:
        print(f"{GREEN}No weak episodes detected.{RESET}")
        print(f"{'═' * 65}\n")
        return

    print(f"{YELLOW}WEAK EPISODES: {len(weak)} found{RESET}\n")

    for ep in weak[:15]:  # Show first 15
        print(f"Ep {ep['episode']}: \"{ep['title']}\"")
        if ep['action']:
            print(f"  Action: \"{ep['action'][:60]}{'...' if len(ep['action']) > 60 else ''}\"")
        for reason in ep['reasons']:
            print(f"  {RED}→ {reason}{RESET}")
        print()

    if len(weak) > 15:
        print(f"  ...and {len(weak) - 15} more\n")

    # Generate recommendation
    ep_nums = [str(ep['episode']) for ep in weak[:10]]
    print(f"{'─' * 65}")
    print(f"{CYAN}RECOMMENDATION:{RESET}")
    print(f"  /treatment {project_name} --plus {','.join(ep_nums)}")
    print(f"{'═' * 65}\n")


def generate_pairwise_prompts(project_path, episodes):
    """Generate pairwise comparison prompts for subjective cliffhanger scoring.

    Uses methodology from /evaluation/pairwise_comparison.md:
    - Reasoning before judgment
    - Specific criteria
    - Tournament bracket structure
    """
    project_name = Path(project_path).name

    output = []
    output.append("=" * 70)
    output.append(f"PAIRWISE COMPARISON PROTOCOL: {project_name.upper()}")
    output.append("=" * 70)
    output.append("")
    output.append("Run these comparisons with Claude to score subjective criteria.")
    output.append("Methodology: reasoning-before-judgment (see pairwise_comparison.md)")
    output.append("")

    # -------------------------------------------------------------------------
    # SECTION 1: CLIFFHANGER INTENSITY RANKING
    # -------------------------------------------------------------------------
    output.append("-" * 70)
    output.append("SECTION 1: CLIFFHANGER INTENSITY RANKING")
    output.append("-" * 70)
    output.append("")
    output.append("Compare cliffhangers pairwise to derive intensity scores.")
    output.append("Ask: 'Which cliffhanger creates more urgency to continue watching?'")
    output.append("")

    # Sample matchups from first 10 episodes
    sorted_eps = sorted(episodes, key=lambda x: x['episode'])[:10]
    if len(sorted_eps) >= 2:
        output.append("SAMPLE MATCHUPS (run tournament for all 60):")
        output.append("")
        for i in range(0, min(len(sorted_eps) - 1, 8), 2):
            ep_a = sorted_eps[i]
            ep_b = sorted_eps[i + 1]
            output.append(f"MATCH: Episode {ep_a['episode']} vs Episode {ep_b['episode']}")
            output.append(f"  A: [{ep_a.get('cliffhanger_type', '?')}] \"{ep_a.get('title', '')}\"")
            output.append(f"  B: [{ep_b.get('cliffhanger_type', '?')}] \"{ep_b.get('title', '')}\"")
            output.append("")

    output.append("PROMPT TEMPLATE:")
    output.append("```")
    output.append("PAIRWISE COMPARISON: Cliffhanger Intensity")
    output.append("")
    output.append("OBJECTIVE: Determine which cliffhanger creates more urgency")
    output.append("")
    output.append("EPISODE A: [title]")
    output.append("[Full cliffhanger description from treatment]")
    output.append("")
    output.append("EPISODE B: [title]")
    output.append("[Full cliffhanger description from treatment]")
    output.append("")
    output.append("TASK (complete in order):")
    output.append("1. ANALYZE A: What makes this cliffhanger urgent/intense?")
    output.append("2. ANALYZE B: What makes this cliffhanger urgent/intense?")
    output.append("3. TRADE-OFF: What does each gain/lose?")
    output.append("4. WINNER: Which creates more urgency?")
    output.append("5. CONFIDENCE: High/Medium/Low")
    output.append("```")
    output.append("")

    # -------------------------------------------------------------------------
    # SECTION 2: TIER 2 SCORED GATES
    # -------------------------------------------------------------------------
    output.append("-" * 70)
    output.append("SECTION 2: TIER 2 SCORED GATES")
    output.append("-" * 70)
    output.append("")
    output.append("Score each dimension 1-10 using calibration anchors.")
    output.append("Minimum average required: 7.0/10")
    output.append("")

    tier2_dimensions = [
        ("Tension Escalation",
         "Does tension build across sequences? Compare to exemplar arcs.",
         "Relentless escalation with strategic release valves"),
        ("Reversal Quality",
         "How surprising and earned are major reversals?",
         "Reversals shock yet feel inevitable in retrospect"),
        ("Stakes Escalation",
         "Do stakes genuinely increase from personal to existential?",
         "From personal to existential, each act raises the bet"),
        ("Variety Score",
         "How varied are episode types, locations, situations?",
         "No two episodes feel similar, constant freshness"),
        ("Pacing Balance",
         "Is there rhythm between intensity and breath?",
         "Perfect alternation - audience can recover but never relax"),
    ]

    for dim_name, question, exemplar in tier2_dimensions:
        output.append(f"### {dim_name}")
        output.append(f"Question: {question}")
        output.append(f"10/10 = {exemplar}")
        output.append("")
        output.append("CALIBRATION ANCHORS:")
        output.append("  10: Exemplary - could teach this dimension")
        output.append("   7: Strong - occasional issues but mostly effective")
        output.append("   5: Adequate - meets minimum but not compelling")
        output.append("   3: Weak - noticeable problems that hurt experience")
        output.append("   1: Failing - dimension actively hurts the arc")
        output.append("")
        output.append(f"SCORE FOR {dim_name.upper()}: ___/10")
        output.append("")

    # -------------------------------------------------------------------------
    # SECTION 3: SEQUENCE-LEVEL CLIFFHANGER SCORING
    # -------------------------------------------------------------------------
    output.append("-" * 70)
    output.append("SECTION 3: SEQUENCE-LEVEL CLIFFHANGER SCORING")
    output.append("-" * 70)
    output.append("")
    output.append("For each sequence, score average cliffhanger intensity.")
    output.append("Minimum required: 8.5/10 per sequence, 9.0/10 overall.")
    output.append("")

    # Group by sequence
    sequences = defaultdict(list)
    for ep in sorted_eps:
        seq = ep.get('sequence')
        if seq:
            sequences[seq].append(ep)

    seq_ranges = {
        1: (1, 8), 2: (9, 15), 3: (16, 20), 4: (21, 28),
        5: (29, 30), 6: (31, 35), 7: (36, 45), 8: (46, 60)
    }

    for seq_num in range(1, 9):
        start, end = seq_ranges.get(seq_num, (0, 0))
        seq_eps = sequences.get(seq_num, [])
        output.append(f"SEQUENCE {seq_num} (Episodes {start}-{end}):")
        for ep in seq_eps[:5]:  # Show first 5
            output.append(f"  Ep {ep['episode']}: [{ep.get('cliffhanger_type', '?')}] \"{ep.get('title', '')}\"")
        if len(seq_eps) > 5:
            output.append(f"  ...and {len(seq_eps) - 5} more")
        output.append("  → SCORE THIS SEQUENCE: ___/10 (min 8.5)")
        output.append("")

    output.append("OVERALL AVERAGE: ___/10 (min 9.0)")
    output.append("")

    # -------------------------------------------------------------------------
    # SUMMARY
    # -------------------------------------------------------------------------
    output.append("=" * 70)
    output.append("SCORING SUMMARY")
    output.append("=" * 70)
    output.append("")
    output.append("TIER 2 DIMENSIONS:")
    output.append("  Tension Escalation:  ___/10")
    output.append("  Reversal Quality:    ___/10")
    output.append("  Stakes Escalation:   ___/10")
    output.append("  Variety Score:       ___/10")
    output.append("  Pacing Balance:      ___/10")
    output.append("  ─────────────────────────")
    output.append("  TIER 2 AVERAGE:      ___/10 (min 7.0)")
    output.append("")
    output.append("CLIFFHANGER INTENSITY:")
    for seq_num in range(1, 9):
        output.append(f"  SEQ {seq_num}: ___/10")
    output.append("  ─────────────────────────")
    output.append("  OVERALL:             ___/10 (min 9.0)")
    output.append("")
    output.append("RESULT:")
    output.append("  [ ] TIER 2 avg ≥7.0 AND all sequences ≥8.5 AND overall ≥9.0")
    output.append("  → READY FOR PROMOTION")
    output.append("=" * 70)

    return "\n".join(output)


def main():
    if len(sys.argv) < 2:
        print(f"Usage: python {sys.argv[0]} <project_path> [--treatment] [--flag-weak] [--pairwise]")
        print(f"Example: python {sys.argv[0]} ../projects/leviathan/development")
        print(f"         python {sys.argv[0]} ./olympus --treatment")
        print(f"         python {sys.argv[0]} ./olympus --flag-weak")
        print(f"         python {sys.argv[0]} ./olympus --pairwise    # Generate pairwise prompts")
        sys.exit(1)

    project_path = sys.argv[1]
    project_name = Path(project_path).name

    # Check for mode flags
    treatment_mode = '--treatment' in sys.argv
    flag_weak_mode = '--flag-weak' in sys.argv
    pairwise_mode = '--pairwise' in sys.argv

    if pairwise_mode:
        # Generate pairwise comparison prompts for subjective scoring
        results = validate_treatment(project_path)
        if results['errors']:
            print(f"{RED}Errors:{RESET}")
            for err in results['errors']:
                print(f"  - {err}")
            sys.exit(1)
        prompts = generate_pairwise_prompts(project_path, results['episodes'])
        print(prompts)
        sys.exit(0)

    if flag_weak_mode:
        results = identify_weak_episodes(project_path)
        print_weak_episodes_report(results, project_name)
        sys.exit(0)

    if treatment_mode:
        results = validate_treatment(project_path)
        if results['errors']:
            print(f"{RED}Errors:{RESET}")
            for err in results['errors']:
                print(f"  - {err}")
            sys.exit(1)
        passed = print_treatment_report(results, project_name)
        sys.exit(0 if passed else 1)

    # Standard arc validation
    results = validate_arc(project_path)

    if results['errors']:
        print(f"{RED}Errors:{RESET}")
        for err in results['errors']:
            print(f"  - {err}")
        sys.exit(1)

    passed = print_report(results, project_name)
    sys.exit(0 if passed else 1)


if __name__ == '__main__':
    main()