#!/usr/bin/env python3
"""
validate_visual_bible.py — Visual Bible Verification Gate

Exit gate for /visual-design phase. Validates that visual_bible.md is
complete, internally consistent, and ready to feed into storyboard generation.

Usage:
    python3 validate_visual_bible.py /leviathan/visual_bible.md /leviathan/
    python3 validate_visual_bible.py /leviathan/visual_bible.md /leviathan/ --json
    python3 validate_visual_bible.py /leviathan/visual_bible.md /leviathan/ --prompt

Exit codes: 0 = clean, 1 = hard errors (FAIL), 2 = warnings (matches CONSTANTS.md)
"""

import argparse
import json
import re
import sys
from pathlib import Path
from dataclasses import dataclass
from typing import List, Optional


# ── Data Structures ────────────────────────────────────────────────────────

@dataclass
class ValidationResult:
    tier: int  # 1=hard error, 2=warning, 3=info
    category: str
    message: str
    fix_hint: Optional[str] = None


# ── Path Resolution ────────────────────────────────────────────────────────

def resolve_project_path(project_arg: str) -> Path:
    """Resolve project path from argument."""
    script_dir = Path(__file__).resolve().parent
    if script_dir.name == "tools" and script_dir.parent.name == "recoil":
        root = script_dir.parent.parent
    else:
        root = Path.cwd()

    project_name = project_arg.strip("/").strip("\\")
    candidate = root / project_name
    if candidate.is_dir():
        return candidate

    abs_path = Path(project_arg)
    if abs_path.is_dir():
        return abs_path

    cwd_path = Path.cwd() / project_name
    if cwd_path.is_dir():
        return cwd_path

    print(f"ERROR: Project directory not found: {project_arg}", file=sys.stderr)
    sys.exit(2)


# ── Section Parsing ────────────────────────────────────────────────────────

REQUIRED_SECTIONS = [
    "Color Palette",
    "Characters",
    "Lens Package",
    "Lighting Guides",
]

OPTIONAL_SECTIONS = [
    "Props",
    "Locations",
    "VFX Elements",
    "Production Notes",
]

REQUIRED_LENSES = [
    "Primary Lens",
    "Close-Up Lens",
    "Wide Lens",
]

HEX_PATTERN = re.compile(r'#([0-9A-Fa-f]{6})\b')
PLACEHOLDER_PATTERN = re.compile(r'\[([A-Z][A-Za-z\s/]+)\]')
UNFILLED_HEX_PATTERN = re.compile(r'#_{2,}')

# Subsection headers under ## Characters that are NOT individual characters
NON_CHARACTER_HEADERS = {
    "Supporting Characters",
    "Visual Reference Only",
    "Supporting Characters (Visual Reference Only)",
}


def parse_sections(text: str) -> dict:
    """Parse visual_bible.md into named sections and subsections."""
    sections = {}
    current_h2 = None
    current_h3 = None
    current_content = []

    for line in text.split("\n"):
        h2_match = re.match(r'^## (.+)$', line)
        h3_match = re.match(r'^### (.+)$', line)

        if h2_match:
            # Save previous
            if current_h2:
                key = f"{current_h2}::{current_h3}" if current_h3 else current_h2
                sections[key] = "\n".join(current_content)
            current_h2 = h2_match.group(1).strip()
            current_h3 = None
            current_content = []
        elif h3_match:
            if current_h2:
                key = f"{current_h2}::{current_h3}" if current_h3 else current_h2
                sections[key] = "\n".join(current_content)
            current_h3 = h3_match.group(1).strip()
            current_content = []
        else:
            current_content.append(line)

    # Save last section
    if current_h2:
        key = f"{current_h2}::{current_h3}" if current_h3 else current_h2
        sections[key] = "\n".join(current_content)

    return sections


# ── Validators ─────────────────────────────────────────────────────────────

def validate_tier1(text: str, sections: dict, project_path: Path) -> List[ValidationResult]:
    """
    Tier 1: Hard Errors (exit code 1 — blocks storyboard generation)
    """
    results = []

    # 1. Required sections exist
    for section_name in REQUIRED_SECTIONS:
        found = any(k.startswith(section_name) or k.split("::")[0] == section_name
                     for k in sections.keys())
        if not found:
            results.append(ValidationResult(
                tier=1,
                category="missing_section",
                message=f"Required section missing: '{section_name}'",
                fix_hint=f"Add '## {section_name}' section with content from visual_bible_template.md"
            ))

    # 2. At least one character defined
    # Skip non-character subsections (e.g., "Supporting Characters" as a subsection)
    char_subsections = [
        k for k in sections
        if k.startswith("Characters::")
        and k.split("::")[-1].strip() not in NON_CHARACTER_HEADERS
    ]
    if not char_subsections:
        results.append(ValidationResult(
            tier=1,
            category="no_characters",
            message="No characters defined in Characters section",
            fix_hint="Add ### [Character Name] subsections under ## Characters"
        ))

    # 3. Each character has a Visual Summary
    for key in char_subsections:
        char_name = key.split("::")[-1]
        content = sections[key]
        if "Visual Summary" not in content and "visual summary" not in content.lower():
            results.append(ValidationResult(
                tier=1,
                category="character_visual",
                message=f"Character '{char_name}' has no Visual Summary",
                fix_hint=f"Add **Visual Summary:** paragraph for {char_name} — this is the prompt seed for every shot"
            ))
        # Check Visual Summary is filled (not just the template placeholder)
        vis_match = re.search(r'\*\*Visual Summary:\*\*\s*\n(.+)', content)
        if vis_match:
            summary_text = vis_match.group(1).strip()
            if summary_text.startswith("[") and summary_text.endswith("]"):
                results.append(ValidationResult(
                    tier=1,
                    category="character_visual",
                    message=f"Character '{char_name}' Visual Summary is still a placeholder",
                    fix_hint=f"Replace placeholder with actual visual description for {char_name}"
                ))

    # 4. Lens Package has required lenses
    lens_section_found = any(k.startswith("Lens Package") for k in sections)
    if lens_section_found:
        for lens_name in REQUIRED_LENSES:
            lens_found = any(k.startswith(f"Lens Package::{lens_name}") for k in sections)
            if not lens_found:
                results.append(ValidationResult(
                    tier=1,
                    category="lens_package",
                    message=f"Required lens missing: '{lens_name}'",
                    fix_hint=f"Add ### {lens_name} subsection under ## Lens Package"
                ))

    # 5. Film stock specified
    film_stock_match = re.search(r'\*\*Stock:\*\*\s*(.+)', text)
    if not film_stock_match:
        results.append(ValidationResult(
            tier=1,
            category="film_stock",
            message="No film stock specified in Lens Package",
            fix_hint="Add **Stock:** [name] under ### Film Stock Reference in Lens Package"
        ))
    elif film_stock_match:
        stock_text = film_stock_match.group(1).strip()
        if stock_text.startswith("[") and stock_text.endswith("]"):
            results.append(ValidationResult(
                tier=1,
                category="film_stock",
                message="Film stock is still a placeholder",
                fix_hint="Replace [e.g. Kodak Vision3 500T] with actual stock choice"
            ))

    # 6. At least one lighting guide
    lighting_subsections = [k for k in sections if k.startswith("Lighting Guides::")]
    if not lighting_subsections:
        results.append(ValidationResult(
            tier=1,
            category="lighting",
            message="No lighting guides defined",
            fix_hint="Add at least one ### [Scenario Name] under ## Lighting Guides with prompt language"
        ))

    # 7. At least one valid HEX color exists (not just placeholders)
    hex_colors = HEX_PATTERN.findall(text)
    unfilled_hex = UNFILLED_HEX_PATTERN.findall(text)
    if not hex_colors and unfilled_hex:
        results.append(ValidationResult(
            tier=1,
            category="color_palette",
            message=f"Color palette has {len(unfilled_hex)} unfilled HEX values and no real colors",
            fix_hint="Fill in #______ placeholders with actual HEX color codes"
        ))

    return results


def validate_tier2(text: str, sections: dict, project_path: Path) -> List[ValidationResult]:
    """
    Tier 2: Consistency Warnings (exit code 2 — should review before storyboard)
    """
    results = []

    # 1. Cross-check characters against characters.md
    chars_file = project_path / "bible" / "characters.md"
    if chars_file.exists():
        chars_text = chars_file.read_text(encoding="utf-8")
        bible_chars = set()
        for m in re.finditer(r'\n## ([A-Z][A-Z\s]+?)(?:\s*—|\s*$)', chars_text):
            name = m.group(1).strip().split("—")[0].strip()
            if name not in ("PURPOSE", "VOICE CONSISTENCY RULES", "CHARACTER ENFORCEMENT RULES",
                            "VALIDATION CHECKLIST"):
                bible_chars.add(name)

        vb_chars = set()
        for key in sections:
            if key.startswith("Characters::"):
                char_name = key.split("::")[-1].strip()
                if char_name in NON_CHARACTER_HEADERS:
                    continue
                # Normalize: strip epithets after em-dash/en-dash (e.g., "Jinx — The Gambler" → "Jinx")
                char_name = re.split(r'\s*[—–-]\s+', char_name)[0].strip()
                vb_chars.add(char_name.upper())

        missing = bible_chars - vb_chars
        if missing:
            results.append(ValidationResult(
                tier=2,
                category="character_coverage",
                message=f"Characters in characters.md missing from visual bible: {', '.join(sorted(missing))}",
                fix_hint="Add visual design sections for all characters from characters.md"
            ))

    # 2. Cross-check locations against breakdown.json
    breakdown_file = project_path / "visual" / "breakdown.json"
    if breakdown_file.exists():
        try:
            breakdown = json.loads(breakdown_file.read_text(encoding="utf-8"))
            bd_locations = set(breakdown.get("locations", {}).keys())
            vb_locations = set()
            for key in sections:
                if key.startswith("Locations::"):
                    loc_name = key.split("::")[-1].strip()
                    vb_locations.add(loc_name)

            if bd_locations and not vb_locations:
                results.append(ValidationResult(
                    tier=2,
                    category="location_coverage",
                    message=f"breakdown.json has {len(bd_locations)} locations but visual bible has none",
                    fix_hint="Add location sections from breakdown.json data"
                ))
        except (json.JSONDecodeError, KeyError):
            pass

    # 3. Unfilled placeholders (template brackets remaining)
    placeholders = PLACEHOLDER_PATTERN.findall(text)
    # Filter out known OK patterns (like "[LOCKED]" status)
    real_placeholders = [p for p in placeholders if p not in (
        "LOCKED", "Generated", "Not started", "In progress", "Ready",
        "Found", "POST", "SFX",
    ) and not p.startswith("path")]
    if real_placeholders:
        unique = sorted(set(real_placeholders))
        results.append(ValidationResult(
            tier=2,
            category="placeholders",
            message=f"{len(unique)} template placeholders still present: {', '.join(unique[:10])}",
            fix_hint="Fill in remaining [bracketed] template text with actual content"
        ))

    # 4. Unfilled HEX values
    unfilled = UNFILLED_HEX_PATTERN.findall(text)
    if unfilled:
        results.append(ValidationResult(
            tier=2,
            category="unfilled_hex",
            message=f"{len(unfilled)} unfilled HEX color values (#______)",
            fix_hint="Replace #______ with actual HEX codes"
        ))

    # 5. Lens focal lengths should be numeric
    lens_sections = [k for k in sections if k.startswith("Lens Package::")]
    for key in lens_sections:
        content = sections[key]
        focal_match = re.search(r'\*\*Focal Length:\*\*\s*(.+)', content)
        if focal_match:
            focal_text = focal_match.group(1).strip()
            if focal_text.startswith("["):
                results.append(ValidationResult(
                    tier=2,
                    category="lens_focal",
                    message=f"{key.split('::')[-1]}: focal length is still a placeholder",
                    fix_hint="Replace with actual focal length (e.g., 50mm)"
                ))

    # 6. Wardrobe table completeness per character
    for key in sections:
        if key.startswith("Characters::"):
            char_name = key.split("::")[-1]
            content = sections[key]
            if "Wardrobe" in content:
                # Check for at least one non-header row
                wardrobe_match = re.search(r'(?:\| Arc Phase|\| Default)', content)
                if not wardrobe_match:
                    results.append(ValidationResult(
                        tier=2,
                        category="wardrobe",
                        message=f"Character '{char_name}': wardrobe table has no entries",
                        fix_hint="Add at least a Default wardrobe row"
                    ))

    return results


def validate_tier3(text: str, sections: dict, project_path: Path) -> List[ValidationResult]:
    """
    Tier 3: Completeness Checks (informational)
    """
    results = []

    # 1. Optional sections present
    for section_name in OPTIONAL_SECTIONS:
        found = any(k.startswith(section_name) or k.split("::")[0] == section_name
                     for k in sections.keys())
        if not found:
            results.append(ValidationResult(
                tier=3,
                category="optional_section",
                message=f"Optional section not present: '{section_name}'",
                fix_hint=f"Consider adding '## {section_name}' from template"
            ))

    # 2. Reference image paths
    ref_pattern = re.compile(r'\|\s*(\w[^|]*?)\s*\|\s*(\[path\]|None)\s*\|')
    ref_placeholders = ref_pattern.findall(text)
    if ref_placeholders:
        results.append(ValidationResult(
            tier=3,
            category="reference_images",
            message=f"{len(ref_placeholders)} reference image slots unfilled",
            fix_hint="Generate reference images and update file paths"
        ))

    # 3. Flux 2 slot assignment summary
    slot_section = any("Flux 2 Reference Slot Summary" in k or "Flux 2 Reference Slot Summary" in sections.get(k, "")
                       for k in sections)
    if not slot_section:
        results.append(ValidationResult(
            tier=3,
            category="slot_summary",
            message="No Flux 2 Reference Slot Summary table found",
            fix_hint="Add slot assignment summary in Production Notes"
        ))

    # 4. HEX color variety check
    hex_colors = list(set(HEX_PATTERN.findall(text)))
    if 0 < len(hex_colors) < 3:
        results.append(ValidationResult(
            tier=3,
            category="color_variety",
            message=f"Only {len(hex_colors)} unique HEX colors defined — consider expanding palette",
            fix_hint="Most projects need at least 5-8 distinct colors across palette + locations"
        ))

    return results


# ── Output Formatting ──────────────────────────────────────────────────────

def format_results(results: List[ValidationResult], mode: str = "report") -> str:
    """Format validation results for display."""
    tier1 = [r for r in results if r.tier == 1]
    tier2 = [r for r in results if r.tier == 2]
    tier3 = [r for r in results if r.tier == 3]

    if mode == "json":
        return json.dumps({
            "is_valid": len(tier1) == 0,
            "exit_code": 1 if tier1 else (2 if tier2 else 0),
            "errors": len(tier1),
            "warnings": len(tier2),
            "info": len(tier3),
            "tier1_errors": [{"category": r.category, "message": r.message} for r in tier1],
            "tier2_warnings": [{"category": r.category, "message": r.message} for r in tier2],
            "tier3_info": [{"category": r.category, "message": r.message} for r in tier3],
        }, indent=2)

    lines = []
    lines.append("=" * 60)
    lines.append("VISUAL BIBLE VALIDATION REPORT")
    lines.append("=" * 60)

    if tier1:
        lines.append(f"\nTIER 1: HARD ERRORS ({len(tier1)}) — BLOCKS STORYBOARD")
        lines.append("-" * 50)
        for r in tier1:
            lines.append(f"  [ERROR] [{r.category}] {r.message}")
            if mode == "prompt" and r.fix_hint:
                lines.append(f"          FIX: {r.fix_hint}")
    else:
        lines.append("\nTIER 1: No hard errors")

    if tier2:
        lines.append(f"\nTIER 2: CONSISTENCY WARNINGS ({len(tier2)}) — REVIEW RECOMMENDED")
        lines.append("-" * 50)
        for r in tier2:
            lines.append(f"  [WARN]  [{r.category}] {r.message}")
            if mode == "prompt" and r.fix_hint:
                lines.append(f"          FIX: {r.fix_hint}")
    else:
        lines.append("\nTIER 2: No warnings")

    if tier3:
        lines.append(f"\nTIER 3: COMPLETENESS ({len(tier3)}) — INFORMATIONAL")
        lines.append("-" * 50)
        for r in tier3:
            lines.append(f"  [INFO]  [{r.category}] {r.message}")
            if mode == "prompt" and r.fix_hint:
                lines.append(f"          FIX: {r.fix_hint}")
    else:
        lines.append("\nTIER 3: All complete")

    lines.append("")
    lines.append("-" * 60)
    if tier1:
        lines.append(f"RESULT: FAIL — {len(tier1)} hard errors must be fixed before storyboarding")
        lines.append("EXIT CODE: 1")
    elif tier2:
        lines.append(f"RESULT: WARNINGS — {len(tier2)} issues should be reviewed")
        lines.append("EXIT CODE: 2")
    else:
        lines.append("RESULT: CLEAN — Ready for storyboard generation")
        lines.append("EXIT CODE: 0")

    return "\n".join(lines)


# ── Main ───────────────────────────────────────────────────────────────────

def main():
    parser = argparse.ArgumentParser(
        description="Validate visual_bible.md for completeness and consistency"
    )
    parser.add_argument("visual_bible", help="Path to visual_bible.md")
    parser.add_argument("project", help="Project path for cross-referencing")
    parser.add_argument("--report", action="store_true", help="Detailed report (default)")
    parser.add_argument("--prompt", action="store_true", help="Include fix instructions")
    parser.add_argument("--json", action="store_true", help="JSON output")

    args = parser.parse_args()

    # Load visual bible
    vb_path = Path(args.visual_bible)
    if not vb_path.exists():
        print(f"ERROR: Visual bible not found: {vb_path}", file=sys.stderr)
        sys.exit(2)

    try:
        text = vb_path.read_text(encoding="utf-8")
    except Exception as e:
        print(f"ERROR: Could not read {vb_path}: {e}", file=sys.stderr)
        sys.exit(2)

    if not text.strip():
        print("ERROR: Visual bible is empty", file=sys.stderr)
        sys.exit(2)

    # Parse sections
    sections = parse_sections(text)

    # Resolve project path
    project_path = resolve_project_path(args.project)

    # Run all tiers
    results = []
    results.extend(validate_tier1(text, sections, project_path))
    results.extend(validate_tier2(text, sections, project_path))
    results.extend(validate_tier3(text, sections, project_path))

    # Output
    if args.json:
        mode = "json"
    elif args.prompt:
        mode = "prompt"
    else:
        mode = "report"

    print(format_results(results, mode))

    # Exit code
    tier1 = [r for r in results if r.tier == 1]
    tier2 = [r for r in results if r.tier == 2]

    if tier1:
        sys.exit(1)
    elif tier2:
        sys.exit(2)
    else:
        sys.exit(0)


if __name__ == "__main__":
    main()