#!/usr/bin/env python3
"""
derive_breakdown.py — Derive breakdown.json from global_bible.json.

Pure Python schema translation. No LLM calls. Deterministic mapping from
Starsend's GlobalBible format to Recoil's breakdown.json format.

Usage:
    python3 tools/derive_breakdown.py \
        path/to/global_bible.json \
        path/to/project/ \
        [--output path/to/breakdown.json] \
        [--merge path/to/existing_breakdown.json]

The --merge flag preserves locked assets, reference images, prompts, and
other human-curated data from an existing breakdown.json while updating
structural data from the global bible.
"""

import argparse
import json
import sys
from datetime import datetime, timezone
from pathlib import Path


def load_json(path: Path) -> dict:
    try:
        with open(path) as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        print(f"ERROR: Invalid JSON in {path}: {e}", file=sys.stderr)
        sys.exit(1)


def save_json(path: Path, data: dict) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        json.dump(data, f, indent=2)
    print(f"Saved: {path}", file=sys.stderr)


def derive_character(char_id: str, bible_char: dict) -> dict:
    """Translate a GlobalBible character to breakdown.json format."""
    episodes = bible_char.get("episodes", [])
    phases = bible_char.get("phases", [])

    # Build wardrobe from phases
    wardrobe = {}
    for phase in phases:
        wardrobe[phase["phase_id"]] = {
            "episodes": [phase["start_ep"], phase["end_ep"]],
            "description": phase.get("wardrobe_description", ""),
            "reference_images": {
                "front": None, "profile": None, "three_quarter": None,
                "close_up": None, "full_body": None, "back": None,
            },
        }

    # Build hair_makeup from phases
    hair_makeup = {}
    for phase in phases:
        hm = phase.get("hair_makeup", "")
        marks = phase.get("distinguishing_marks", "")
        if hm or marks:
            hair_makeup[phase["phase_id"]] = {
                "episodes": [phase["start_ep"], phase["end_ep"]],
                "hair": hm,
                "skin": marks,
                "notes": phase.get("phase_trigger_event", ""),
            }

    # Build state_changes from phase triggers
    state_changes = []
    for phase in phases:
        trigger = phase.get("phase_trigger_event", "")
        if trigger:
            state_changes.append({
                "episode": phase["start_ep"],
                "change": trigger,
            })

    # Physical scale
    physical_scale = {}
    if bible_char.get("height_cm"):
        physical_scale["height_cm"] = bible_char["height_cm"]
    if bible_char.get("scale_prompt_fragment"):
        physical_scale["scale_prompt_fragment"] = bible_char["scale_prompt_fragment"]
        physical_scale["build"] = bible_char["scale_prompt_fragment"]

    return {
        "display_name": bible_char.get("display_name", char_id),
        "episodes": episodes,
        "episode_count": len(episodes),
        "first_appearance": episodes[0] if episodes else 1,
        "dialogue_count": 0,  # Not tracked in bible; regex pass fills this
        "visual_description": bible_char.get("visual_description", ""),
        "wardrobe": wardrobe,
        "hair_makeup": hair_makeup,
        "state_changes": state_changes,
        "signature_props": [],
        "physical_scale": physical_scale,
        "reference_status": "not_started",
        "reference_images": {
            "front": None, "profile": None, "three_quarter": None,
            "close_up": None, "full_body": None, "back": None, "hero": None,
        },
        "prompts": {"reference": None, "flux2": None},
    }


def derive_location(loc_id: str, bible_loc: dict) -> dict:
    """Translate a GlobalBible location to breakdown.json format."""
    # Infer INT/EXT from location_id
    loc_type = "INT"
    if loc_id.startswith("ext_"):
        loc_type = "EXT"
    elif loc_id.startswith("int_ext_"):
        loc_type = "INT/EXT"

    # Build lighting profile if present
    lp = bible_loc.get("lighting_profile")
    lighting_profile = None
    if lp:
        lighting_profile = {
            "primary_source": lp.get("primary_source", ""),
            "direction": lp.get("direction", "ABOVE"),
            "quality": lp.get("quality", "hard"),
            "color_temp": lp.get("color_temp", "neutral"),
        }

    return {
        "type": loc_type,
        "habitat_zone": bible_loc.get("habitat_zone", ""),
        "aliases": bible_loc.get("aliases", []),
        "episodes": [],  # Not tracked per-location in bible; regex pass fills this
        "episode_count": 0,
        "description_samples": [bible_loc.get("description", "")],
        "lighting_notes": [bible_loc.get("atmosphere", "")],
        "lighting_profile": lighting_profile,
        "color_palette": bible_loc.get("color_palette", []),
        "reference_status": "not_started",
        "reference_images": {"wide_establishing": None, "detail_texture": None},
        "prompts": {"reference": None, "flux2": None},
    }


def derive_prop(prop_id: str, bible_prop: dict) -> dict:
    """Translate a GlobalBible prop to breakdown.json format."""
    episodes = bible_prop.get("episodes", [])
    assoc = bible_prop.get("associated_characters", [])

    return {
        "display_name": prop_id.replace("_", " ").title(),
        "owner": assoc[0] if assoc else None,
        "episodes": episodes,
        "episode_count": len(episodes),
        "description_samples": [bible_prop.get("description", "")],
        "states": ["default"],
        "confidence": "high",
        "reference_status": "not_started",
        "reference_images": {"default": None, "active_state": None},
        "prompts": {"reference": None, "flux2": None},
    }


def derive_breakdown(bible: dict, project_name: str) -> dict:
    """Full schema translation: global_bible.json → breakdown.json."""
    total_eps = bible.get("total_episodes", 60)

    characters = {}
    for char_id, char_data in bible.get("characters", {}).items():
        characters[char_id] = derive_character(char_id, char_data)

    # Map signature props back to characters
    for prop_id, prop_data in bible.get("props", {}).items():
        for assoc_char in prop_data.get("associated_characters", []):
            if assoc_char in characters:
                if prop_id not in characters[assoc_char]["signature_props"]:
                    characters[assoc_char]["signature_props"].append(prop_id)

    locations = {}
    for loc_id, loc_data in bible.get("locations", {}).items():
        locations[loc_id] = derive_location(loc_id, loc_data)

    props = {}
    for prop_id, prop_data in bible.get("props", {}).items():
        props[prop_id] = derive_prop(prop_id, prop_data)

    # Derive lighting motifs as SFX elements (closest mapping)
    sfx_elements = {}
    for motif in bible.get("lighting_motifs", []):
        sfx_elements[motif["motif_id"]] = {
            "display_name": motif["motif_id"].replace("_", " ").title(),
            "type": "promptable",
            "episodes": [],
            "episode_count": 0,
            "description_samples": [motif.get("description", "")],
            "production_method": "prompt_directly",
        }

    return {
        "version": 1,
        "project": project_name,
        "generated": datetime.now(timezone.utc).isoformat(),
        "episodes_processed": total_eps,
        "episode_range": [1, total_eps],
        "characters": characters,
        "locations": locations,
        "props": props,
        "sfx_elements": sfx_elements,
        "vfx_elements": {},
        "specialty_shots": [],
        "audio_flags": [],
        "shot_estimates": {
            "total_episodes": total_eps,
            "shots_per_episode": 30,
            "total_shots": total_eps * 30,
            "character_shots": {
                cid: len(cd.get("episodes", [])) * 15
                for cid, cd in bible.get("characters", {}).items()
            },
            "specialty_shots_count": 0,
        },
        "asset_lock_status": {
            "characters_locked": 0,
            "characters_total": len(characters),
            "locations_locked": 0,
            "locations_total": len(locations),
            "props_locked": 0,
            "props_total": len(props),
            "vfx_locked": 0,
            "vfx_total": 0,
        },
    }


def merge_breakdown(derived: dict, existing: dict) -> dict:
    """Merge derived breakdown with existing, preserving human-curated data.

    Strategy: UNION merge. Gemini provides structural quality (phases,
    descriptions, state tracking). Regex provides coverage (episode counts,
    dialogue counts, additional assets Gemini hasn't seen).
    """
    # --- Characters: merge Gemini structure with regex coverage ---
    for char_id, char_data in derived.get("characters", {}).items():
        if char_id in existing.get("characters", {}):
            ex = existing["characters"][char_id]
            # Preserve reference images and prompts
            if ex.get("reference_status") in ("in_progress", "locked"):
                char_data["reference_status"] = ex["reference_status"]
                char_data["reference_images"] = ex.get("reference_images", char_data["reference_images"])
            if ex.get("prompts", {}).get("reference"):
                char_data["prompts"]["reference"] = ex["prompts"]["reference"]
            if ex.get("prompts", {}).get("flux2"):
                char_data["prompts"]["flux2"] = ex["prompts"]["flux2"]
            # Preserve hero_baseline if present
            if ex.get("hero_baseline"):
                char_data["hero_baseline"] = ex["hero_baseline"]
            # Preserve color_palette if present
            if ex.get("color_palette"):
                char_data["color_palette"] = ex["color_palette"]
            # Prefer regex episode/dialogue coverage (scans all 60 episodes)
            if len(ex.get("episodes", [])) > len(char_data.get("episodes", [])):
                char_data["episodes"] = ex["episodes"]
                char_data["episode_count"] = len(ex["episodes"])
            if ex.get("dialogue_count", 0) > char_data.get("dialogue_count", 0):
                char_data["dialogue_count"] = ex["dialogue_count"]
            if ex.get("first_appearance") and not char_data.get("first_appearance"):
                char_data["first_appearance"] = ex["first_appearance"]
            # Preserve per-wardrobe reference images
            for phase_key, phase_data in char_data.get("wardrobe", {}).items():
                if phase_key in ex.get("wardrobe", {}):
                    ex_phase = ex["wardrobe"][phase_key]
                    if ex_phase.get("reference_images"):
                        phase_data["reference_images"] = ex_phase["reference_images"]

    # Characters in existing but NOT in derived (regex found them, Gemini didn't) — keep them
    for char_id, ex_char in existing.get("characters", {}).items():
        if char_id not in derived.get("characters", {}):
            derived["characters"][char_id] = ex_char

    # --- Locations: merge Gemini structure with regex coverage ---
    for loc_id, loc_data in derived.get("locations", {}).items():
        if loc_id in existing.get("locations", {}):
            ex = existing["locations"][loc_id]
            if ex.get("reference_status") in ("in_progress", "locked"):
                loc_data["reference_status"] = ex["reference_status"]
                loc_data["reference_images"] = ex.get("reference_images", loc_data["reference_images"])
            if ex.get("prompts", {}).get("reference"):
                loc_data["prompts"]["reference"] = ex["prompts"]["reference"]
            # Prefer regex episode coverage
            if len(ex.get("episodes", [])) > len(loc_data.get("episodes", [])):
                loc_data["episodes"] = ex["episodes"]
                loc_data["episode_count"] = len(ex["episodes"])

    # Locations in existing but NOT in derived — keep them
    for loc_id, ex_loc in existing.get("locations", {}).items():
        if loc_id not in derived.get("locations", {}):
            derived["locations"][loc_id] = ex_loc

    # --- Props: GEMINI-AUTHORITATIVE merge ---
    # Gemini is the authority on what IS a real prop (filters regex false positives).
    # Regex provides wider episode coverage for Gemini-confirmed props.
    # Props only in regex (not in Gemini) are dropped UNLESS manually curated (locked/in_progress).
    for prop_id, prop_data in derived.get("props", {}).items():
        if prop_id in existing.get("props", {}):
            ex = existing["props"][prop_id]
            if ex.get("reference_status") in ("in_progress", "locked"):
                prop_data["reference_status"] = ex["reference_status"]
                prop_data["reference_images"] = ex.get("reference_images", prop_data["reference_images"])
            if ex.get("prompts", {}).get("reference"):
                prop_data["prompts"]["reference"] = ex["prompts"]["reference"]
            # Prefer regex episode coverage (wider scan)
            if len(ex.get("episodes", [])) > len(prop_data.get("episodes", [])):
                prop_data["episodes"] = ex["episodes"]
                prop_data["episode_count"] = len(ex["episodes"])

    # Props in existing but NOT in derived — only keep if human-curated (locked/in_progress)
    # This filters out regex false positives that Gemini correctly ignored
    for prop_id, ex_prop in existing.get("props", {}).items():
        if prop_id not in derived.get("props", {}):
            if ex_prop.get("reference_status") in ("in_progress", "locked"):
                derived["props"][prop_id] = ex_prop

    # Preserve SFX, VFX, specialty shots, audio flags from existing (regex pass)
    if existing.get("sfx_elements"):
        # Merge: keep existing SFX, add any new from derived (lighting motifs)
        for sfx_id, sfx_data in derived.get("sfx_elements", {}).items():
            if sfx_id not in existing["sfx_elements"]:
                existing["sfx_elements"][sfx_id] = sfx_data
        derived["sfx_elements"] = existing["sfx_elements"]
    if existing.get("vfx_elements"):
        derived["vfx_elements"] = existing["vfx_elements"]
    if existing.get("specialty_shots"):
        derived["specialty_shots"] = existing["specialty_shots"]
    if existing.get("audio_flags"):
        derived["audio_flags"] = existing["audio_flags"]
    if existing.get("shot_estimates"):
        derived["shot_estimates"] = existing["shot_estimates"]

    # Recalculate lock status
    derived["asset_lock_status"] = {
        "characters_locked": sum(1 for c in derived["characters"].values() if c.get("reference_status") == "locked"),
        "characters_total": len(derived["characters"]),
        "locations_locked": sum(1 for l in derived["locations"].values() if l.get("reference_status") == "locked"),
        "locations_total": len(derived["locations"]),
        "props_locked": sum(1 for p in derived["props"].values() if p.get("reference_status") == "locked"),
        "props_total": len(derived["props"]),
        "vfx_locked": sum(1 for v in derived.get("vfx_elements", {}).values() if v.get("reference_status") == "locked"),
        "vfx_total": len(derived.get("vfx_elements", {})),
    }

    return derived


def main():
    parser = argparse.ArgumentParser(
        description="Derive breakdown.json from global_bible.json (pure schema translation)"
    )
    parser.add_argument("bible", type=str, help="Path to global_bible.json")
    parser.add_argument("project_dir", type=str, help="Path to project directory")
    parser.add_argument(
        "--output", type=str, default=None,
        help="Output path (default: project_dir/visual/breakdown.json)"
    )
    parser.add_argument(
        "--merge", type=str, default=None,
        help="Path to existing breakdown.json to merge with (preserves locks)"
    )
    args = parser.parse_args()

    bible_path = Path(args.bible)
    project_dir = Path(args.project_dir)

    if not bible_path.exists():
        print(f"ERROR: Bible not found: {bible_path}", file=sys.stderr)
        sys.exit(1)

    bible = load_json(bible_path)
    project_name = bible.get("project", project_dir.name)

    # Strip structural_analysis (Gemini CoT) if present
    bible.pop("structural_analysis", None)

    breakdown = derive_breakdown(bible, project_name)

    if args.merge:
        merge_path = Path(args.merge)
        if merge_path.exists():
            existing = load_json(merge_path)
            breakdown = merge_breakdown(breakdown, existing)
            print(f"Merged with existing: {merge_path}", file=sys.stderr)

    output_path = Path(args.output) if args.output else project_dir / "visual" / "breakdown.json"
    save_json(output_path, breakdown)

    # Summary
    print(f"\nDerived breakdown.json from global_bible.json", file=sys.stderr)
    print(f"  Characters: {len(breakdown['characters'])}", file=sys.stderr)
    print(f"  Locations:  {len(breakdown['locations'])}", file=sys.stderr)
    print(f"  Props:      {len(breakdown['props'])}", file=sys.stderr)
    print(f"  SFX:        {len(breakdown['sfx_elements'])}", file=sys.stderr)
    print(f"  Output:     {output_path}", file=sys.stderr)


if __name__ == "__main__":
    main()
