#!/usr/bin/python3 """ Dramatic QC Gate - Post-Batch Quality Assessment This script runs dramatic quality checks on generated episodes. Unlike validate_behavioral_dna.py (hard gate), this is a SOFT GATE: - Reports issues but doesn't block generation - Provides /rewrite commands for fixing - Can be integrated into checkpoint workflow Checks: 1. Voice distinctiveness (swap test approximation) 2. Emotional register variety 3. Declaration timing (earned vs declared) 4. Theme statement detection Usage: python3 dramatic_qc_gate.py --batch python3 dramatic_qc_gate.py --ep python3 dramatic_qc_gate.py --full Arguments: project_path Path to production project --batch N Check batch N (episodes N*5-4 to N*5) --ep N Check single episode N --full Check all episodes Returns: - Exit code 0: No MUST FIX issues found - Exit code 1: MUST FIX issues found (soft fail - reports but doesn't block) - Exit code 2: Configuration/path error """ import random import sys import re import json from pathlib import Path from collections import defaultdict # Import batch size, shared dialogue parsing, and LLM helpers from engine constants try: sys.path.insert(0, str(Path(__file__).resolve().parent.parent / 'tools')) from engine_constants import ( GENERATION_BATCH_SIZE, ANTHROPIC_SONNET, ANTHROPIC_HAIKU, get_anthropic_client, call_anthropic, parse_llm_field, extract_script_content, parse_dialogue_blocks as _shared_parse_dialogue_blocks, ) _USE_SHARED = True except ImportError: GENERATION_BATCH_SIZE = 5 ANTHROPIC_SONNET = "claude-sonnet-4-6" ANTHROPIC_HAIKU = "claude-haiku-4-5-20251001" get_anthropic_client = lambda: None call_anthropic = lambda client, model, prompt, max_tokens=200: None parse_llm_field = lambda result, field, expected=None: None extract_script_content = lambda content: content _USE_SHARED = False # Early relationship declarations that are red flags # Thresholds from CONSTANTS.md → Relationship Earning Schedule EARLY_DECLARATION_PHRASES = { 'i love you': 51, # Major declarations: episodes 51-60 'i trust you completely': 21, # Tentative trust: episodes 21-30 'you\'re my family': 41, # Deep connection: episodes 41-50 'i need you': 31, # Significant statements: episodes 31-40 'you mean everything': 51, # Major declarations: episodes 51-60 'i see you completely': 41, # Deep connection: episodes 41-50 'you\'re all i have': 41, # Deep connection: episodes 41-50 } # Theme statement red flags (characters shouldn't state themes directly) THEME_STATEMENT_PATTERNS = [ r'you can never trust', r'trust is (?:earned|broken|everything)', r'that\'s what (?:love|trust|friendship) (?:really )?means', r'this is what (?:power|greed|love) does', r'we(?:\'re| are) all just', r'in the end,? (?:we|everyone|people)', r'the (?:truth|lesson|moral) is', ] # Generic AI dialogue patterns GENERIC_AI_PATTERNS = [ r'i have calculated', r'processing (?:your|the) request', r'my analysis indicates', r'probability of success', r'baseline parameters', r'optimal solution', r'executing command', r'data insufficient', ] # Generic action dialogue GENERIC_ACTION_PATTERNS = [ r'^we need to (?:move|go|run|leave)', r'^let\'s go\.?$', r'^come on\.?$', r'^watch out\.?$', r'^be careful\.?$', r'^stay here\.?$', r'^get down\.?$', ] def extract_script_content(content): """Extract the fountain script content from the episode file.""" fountain_match = re.search(r'```fountain\s*(.*?)```', content, re.DOTALL) if fountain_match: return fountain_match.group(1) script_match = re.search(r'## SCRIPT\s*(.*?)(?=##|$)', content, re.DOTALL) if script_match: return script_match.group(1) return content def extract_dialogue_lines(script): """Extract character dialogue from script. Uses shared parse_dialogue_blocks from engine_constants when available, which correctly handles multi-line dialogue (fixes bug where current_speaker was reset after first dialogue line).""" dialogue = [] if _USE_SHARED: # Use shared parser — correctly handles multi-line dialogue blocks blocks = _shared_parse_dialogue_blocks(script) for speaker, dialogue_text in blocks: # Split multi-line dialogue into individual lines for downstream consumers for line in dialogue_text.split('\n'): line = line.strip() if line: dialogue.append({ 'speaker': speaker, 'line': line, }) return dialogue # Fallback: original implementation (has multi-line dialogue bug) lines = script.split('\n') current_speaker = None for i, line in enumerate(lines): stripped = line.strip() # Character name (ALL CAPS, not a scene heading) if stripped.isupper() and len(stripped) < 30: if not stripped.startswith(('INT.', 'EXT.', '.', '#')): if stripped not in ['ECU', 'CU', 'MCU', 'MS', 'WS', 'POV', 'SFX', 'VFX', 'INSERT', 'CONTINUOUS', 'LATER', 'NIGHT', 'DAY', 'MORNING', 'EVENING']: current_speaker = stripped continue # Dialogue line if current_speaker and stripped and not stripped.startswith(('INT.', 'EXT.', '.', '#')): if not stripped.isupper(): # Not another character name dialogue.append({ 'speaker': current_speaker, 'line': stripped, }) current_speaker = None # Reset after getting dialogue return dialogue def check_voice_distinctiveness(episodes_data): """Check for generic dialogue patterns.""" issues = [] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: line_lower = item['line'].lower() # Check for generic AI patterns for pattern in GENERIC_AI_PATTERNS: if re.search(pattern, line_lower): issues.append({ 'episode': ep_num, 'type': 'voice', 'severity': 'MUST FIX', 'speaker': item['speaker'], 'line': item['line'][:60], 'detail': 'Generic AI dialogue - could be any AI character', 'fix_cmd': f'/rewrite [project] ep {ep_num} "{item["speaker"]} dialogue generic—add distinctive voice"', }) break # Check for generic action patterns for pattern in GENERIC_ACTION_PATTERNS: if re.search(pattern, line_lower): issues.append({ 'episode': ep_num, 'type': 'voice', 'severity': 'COULD IMPROVE', 'speaker': item['speaker'], 'line': item['line'][:60], 'detail': 'Generic action line - could be any character', 'fix_cmd': f'/rewrite [project] ep {ep_num} "Generic dialogue—rewrite with character idiom"', }) break return issues def check_emotional_register(episodes_data): """Check for monotone emotional register across batch.""" issues = [] # Simple intensity estimation based on content intensities = {} for ep_num, content in episodes_data.items(): script = extract_script_content(content) script_lower = script.lower() # High intensity markers high_markers = ['fires', 'shoots', 'explosion', 'attack', 'chase', 'running', 'fight', 'scream', 'blood', 'death'] # Low intensity markers low_markers = ['quietly', 'softly', 'silence', 'pause', 'beat', 'gentle', 'whisper', 'moment', 'peace'] high_count = sum(1 for m in high_markers if m in script_lower) low_count = sum(1 for m in low_markers if m in script_lower) # Estimate intensity 1-10 if high_count > low_count + 2: intensity = min(10, 6 + high_count) elif low_count > high_count + 2: intensity = max(1, 5 - low_count) else: intensity = 5 + (high_count - low_count) intensities[ep_num] = min(10, max(1, intensity)) # Check for monotone (all within 2 points of each other) if len(intensities) >= 3: values = list(intensities.values()) range_val = max(values) - min(values) if range_val <= 2: issues.append({ 'episode': 'batch', 'type': 'texture', 'severity': 'COULD IMPROVE', 'detail': f'Monotone emotional register across batch (range: {range_val})', 'intensities': intensities, 'suggestion': 'Vary one episode significantly up or down', }) return issues def check_earned_declarations(episodes_data): """Check for unearned emotional declarations.""" issues = [] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: line_lower = item['line'].lower() for phrase, min_ep in EARLY_DECLARATION_PHRASES.items(): if phrase in line_lower and ep_num < min_ep: issues.append({ 'episode': ep_num, 'type': 'relationship_earning', 'severity': 'MUST FIX', 'speaker': item['speaker'], 'line': item['line'][:60], 'detail': f'Declaration too early (found in Ep {ep_num}, appropriate after Ep {min_ep})', 'fix_cmd': f'/rewrite [project] ep {ep_num} "Unearned declaration—soften or move later"', }) return issues def check_theme_statements(episodes_data): """Check for characters directly stating themes.""" issues = [] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: line_lower = item['line'].lower() for pattern in THEME_STATEMENT_PATTERNS: if re.search(pattern, line_lower): issues.append({ 'episode': ep_num, 'type': 'texture', 'severity': 'MUST FIX', 'speaker': item['speaker'], 'line': item['line'][:60], 'detail': 'Theme stated directly in dialogue—should be embodied, not announced', 'fix_cmd': f'/rewrite [project] ep {ep_num} "Theme stated in dialogue—embody instead"', }) break return issues # --------------------------------------------------------------------------- # LLM-based Cover Test (Scaffolding Gate G3 — Seger) # Strips dialogue tags, asks Sonnet to identify speakers. # If accuracy < 80%, dialogue voices have converged. # --------------------------------------------------------------------------- def _load_character_names(project_path): """Load character names from characters.md for the Cover Test.""" chars_path = project_path / "bible" / "characters.md" if not chars_path.exists(): chars_path = project_path / "characters.md" if not chars_path.exists(): return [] content = chars_path.read_text() names = [] for match in re.finditer(r'^##\s+([A-Z][A-Za-z0-9\s\-]+)', content, re.MULTILINE): name = match.group(1).strip() if name.lower() not in ('voice', 'speech', 'dialogue', 'overview', 'behavioral'): names.append(name.upper()) return names def check_cover_test(episodes_data, project_path): """ Cover Test (G3 — Seger): Strip dialogue tags, ask LLM to identify speakers. If Sonnet can't identify ≥80% of speakers correctly, the voices have converged — flag for Script Doctor. Returns list of issues. Falls back gracefully if no API key. """ issues = [] client = get_anthropic_client() if client is None: return issues # Skip silently without API character_names = _load_character_names(project_path) if not character_names: return issues # Collect all dialogue across the batch all_dialogue = [] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: if item['speaker'] in character_names: all_dialogue.append({ 'ep': ep_num, 'speaker': item['speaker'], 'line': item['line'], }) if len(all_dialogue) < 5: return issues # Not enough dialogue to test # Sample up to 20 lines for the test (cost control) sample = random.sample(all_dialogue, min(20, len(all_dialogue))) # Build the stripped-tags prompt numbered_lines = [] answer_key = {} for i, item in enumerate(sample, 1): numbered_lines.append(f"{i}. \"{item['line']}\"") answer_key[i] = item['speaker'] chars_list = ', '.join(sorted(set(character_names))) prompt = f"""You are taking a dialogue identification test. Below are lines of dialogue from a microdrama series with the speaker tags REMOVED. Based only on word choice, rhythm, and perspective, identify which character said each line. Characters in this series: {chars_list} Dialogue lines (speaker unknown): {chr(10).join(numbered_lines)} For each numbered line, output the character name you think said it. Output EXACTLY in this format (one per line): 1: [CHARACTER NAME] 2: [CHARACTER NAME] ...""" try: resp = client.messages.create( model=ANTHROPIC_SONNET, max_tokens=500, messages=[{"role": "user", "content": prompt}], ) result = resp.content[0].text.strip() # Parse responses correct = 0 total = 0 misidentified = [] for line in result.split('\n'): match = re.match(r'(\d+)\s*:\s*(.+)', line.strip()) if match: line_num = int(match.group(1)) guessed = match.group(2).strip().upper() if line_num in answer_key: total += 1 actual = answer_key[line_num] if guessed == actual or guessed in actual or actual in guessed: correct += 1 else: misidentified.append({ 'line_num': line_num, 'actual': actual, 'guessed': guessed, 'text': sample[line_num - 1]['line'][:50], 'ep': sample[line_num - 1]['ep'], }) accuracy = (correct / total * 100) if total > 0 else 100 if accuracy < 80: issues.append({ 'episode': 'batch', 'type': 'voice', 'severity': 'MUST FIX', 'detail': f'COVER TEST FAILED: {accuracy:.0f}% speaker accuracy ({correct}/{total}). Voices have converged.', 'suggestion': f'Misidentified: {", ".join(m["actual"] + " mistaken for " + m["guessed"] for m in misidentified[:3])}', 'fix_cmd': '/script-doctor [project] --focus voice', 'cover_test_accuracy': accuracy, }) elif accuracy < 90: issues.append({ 'episode': 'batch', 'type': 'voice', 'severity': 'COULD IMPROVE', 'detail': f'Cover Test: {accuracy:.0f}% speaker accuracy ({correct}/{total}). Some voice convergence.', 'cover_test_accuracy': accuracy, }) except Exception: pass # Fail silently return issues def check_on_the_nose(episodes_data): """ On-the-Nose Diagnostic (R4 — Seger): Detect characters directly stating emotions or themes. Uses regex as fast pre-filter, then Haiku for uncertain cases. Flags for Script Doctor Phase 2 rewrite into action/subtext. Returns list of issues. """ issues = [] client = get_anthropic_client() # First pass: regex (existing theme patterns + new emotion patterns) emotion_patterns = [ r'i(?:\'m| am) (?:so |really |truly )?(?:angry|scared|afraid|sad|happy|hurt|betrayed|lonely|broken)', r'(?:you|he|she|they) make[s]? me (?:feel |so )?(?:angry|scared|afraid|sad|happy|hurt)', r'i feel (?:so |really |truly )?(?:lost|alone|empty|broken|free|alive|dead inside)', r'this (?:is|feels) (?:just )?like (?:losing|finding|being)', ] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: line_lower = item['line'].lower() # Check emotion statements for pattern in emotion_patterns: if re.search(pattern, line_lower): issues.append({ 'episode': ep_num, 'type': 'on_the_nose', 'severity': 'MUST FIX', 'speaker': item['speaker'], 'line': item['line'][:60], 'detail': 'Character directly states emotional state — rewrite into action or subtext', 'fix_cmd': f'/rewrite [project] ep {ep_num} "On-the-nose dialogue—translate to action/subtext"', }) break # Second pass: LLM for deeper on-the-nose detection (batch level) if client and len(issues) < 5: # Only run LLM pass if regex didn't already catch many issues all_dialogue_text = [] for ep_num, content in episodes_data.items(): script = extract_script_content(content) dialogue = extract_dialogue_lines(script) for item in dialogue: all_dialogue_text.append(f"Ep{ep_num} {item['speaker']}: {item['line']}") if len(all_dialogue_text) >= 3: dialogue_block = '\n'.join(all_dialogue_text[:40]) # Cap at 40 lines prompt = f"""Review this dialogue for on-the-nose writing. Flag lines where characters directly STATE their emotions, announce the theme, or explain what they're feeling instead of showing it through action or subtext. {dialogue_block} List ONLY the on-the-nose lines (max 5). For each, output: LINE: [the exact dialogue] PROBLEM: [why it's on-the-nose, in 5 words] If no on-the-nose lines found, output: NONE""" try: resp = client.messages.create( model=ANTHROPIC_HAIKU, max_tokens=400, messages=[{"role": "user", "content": prompt}], ) result = resp.content[0].text.strip() if "NONE" not in result.upper(): # Parse flagged lines for match in re.finditer(r'LINE:\s*(.+?)(?:\n|$)', result): flagged_line = match.group(1).strip().strip('"\'') # Find which episode/speaker this belongs to for ep_num, content in episodes_data.items(): script = extract_script_content(content) if flagged_line[:30].lower() in script.lower(): issues.append({ 'episode': ep_num, 'type': 'on_the_nose', 'severity': 'COULD IMPROVE', 'line': flagged_line[:60], 'detail': 'LLM detected on-the-nose dialogue — consider rewriting into subtext', 'fix_cmd': f'/rewrite [project] ep {ep_num} "On-the-nose—translate to subtext"', }) break except Exception: pass return issues def load_episodes(project_path, batch_num=None, ep_num=None, full=False): """Load episode content based on mode.""" episodes_dir = project_path / "episodes" episodes_data = {} if not episodes_dir.exists(): return episodes_data if batch_num: ep_start = (batch_num - 1) * GENERATION_BATCH_SIZE + 1 ep_end = batch_num * GENERATION_BATCH_SIZE ep_range = range(ep_start, ep_end + 1) elif ep_num: ep_range = [ep_num] elif full: # Find all episodes ep_files = sorted(episodes_dir.glob("ep_*.md")) ep_range = [] for f in ep_files: match = re.search(r'ep_(\d+)', f.name) if match: ep_range.append(int(match.group(1))) else: return episodes_data for ep in ep_range: ep_file = episodes_dir / f"ep_{ep:03d}.md" if ep_file.exists(): episodes_data[ep] = ep_file.read_text() return episodes_data def main(): if len(sys.argv) < 3: print("Usage: python3 dramatic_qc_gate.py --batch ") print(" python3 dramatic_qc_gate.py --ep ") print(" python3 dramatic_qc_gate.py --full") sys.exit(2) project_path = Path(sys.argv[1]).resolve() if not project_path.exists(): print(f"Error: Project path does not exist: {project_path}") sys.exit(2) # Parse mode batch_num = None ep_num = None full = False args = sys.argv[2:] i = 0 while i < len(args): if args[i] == '--batch' and i + 1 < len(args): batch_num = int(args[i + 1]) i += 2 elif args[i] == '--ep' and i + 1 < len(args): ep_num = int(args[i + 1]) i += 2 elif args[i] == '--full': full = True i += 1 else: i += 1 # Load episodes episodes_data = load_episodes(project_path, batch_num, ep_num, full) if not episodes_data: print(f"Error: No episodes found") sys.exit(2) # Run checks all_issues = [] all_issues.extend(check_voice_distinctiveness(episodes_data)) all_issues.extend(check_cover_test(episodes_data, project_path)) all_issues.extend(check_emotional_register(episodes_data)) all_issues.extend(check_earned_declarations(episodes_data)) all_issues.extend(check_theme_statements(episodes_data)) all_issues.extend(check_on_the_nose(episodes_data)) # Categorize must_fix = [i for i in all_issues if i['severity'] == 'MUST FIX'] could_improve = [i for i in all_issues if i['severity'] == 'COULD IMPROVE'] # Report if batch_num: scope = f"Batch {batch_num} (Ep {(batch_num-1)*GENERATION_BATCH_SIZE+1}-{batch_num*GENERATION_BATCH_SIZE})" elif ep_num: scope = f"Episode {ep_num}" else: scope = f"Full Series ({len(episodes_data)} episodes)" print(f"\n{'='*60}") print(f"DRAMATIC QC: {scope}") print(f"Project: {project_path.name}") print(f"{'='*60}") print(f"\nMUST FIX: {len(must_fix)} issues") print(f"COULD IMPROVE: {len(could_improve)} issues") if must_fix: print(f"\n{'-'*60}") print("MUST FIX") print(f"{'-'*60}") for issue in must_fix: print(f"\n[{issue['type'].upper()}] Ep {issue['episode']}: {issue.get('speaker', '')}") if 'line' in issue: print(f" Line: \"{issue['line']}\"") print(f" Problem: {issue['detail']}") if 'fix_cmd' in issue: print(f" {issue['fix_cmd']}") if could_improve: print(f"\n{'-'*60}") print("COULD IMPROVE") print(f"{'-'*60}") for issue in could_improve: print(f"\n[{issue['type'].upper()}] Ep {issue['episode']}: {issue.get('speaker', '')}") if 'line' in issue: print(f" Line: \"{issue['line']}\"") print(f" {issue['detail']}") if 'suggestion' in issue: print(f" Suggestion: {issue['suggestion']}") if 'fix_cmd' in issue: print(f" {issue['fix_cmd']}") # Result print(f"\n{'='*60}") if must_fix: print(f"DRAMATIC QC: {len(must_fix)} MUST FIX issues found") print(f"\nRun the /rewrite commands above to address issues.") print(f"{'='*60}\n") sys.exit(1) # Soft fail - reports but doesn't block else: print(f"DRAMATIC QC: PASSED (no critical issues)") if could_improve: print(f"Consider addressing the {len(could_improve)} COULD IMPROVE items.") print(f"{'='*60}\n") sys.exit(0) if __name__ == "__main__": main()