#!/usr/bin/env python3
"""Generate derived sublocation refs and reconcile ``base/location.json``.

The bible owns sublocation names and semantic descriptions. This tool derives
visual refs from the highest-version master location plate and is the sole
writer of the sublocation registry after the 2026-06-11 retirement of
``migrate_sublocations.py``. Registry ``source_sha256`` is the sha256 of the
bible description string after UTF-8 encoding and ``.strip()`` only; plate
hashes live only in ref sidecars.
"""

from __future__ import annotations

import argparse
import hashlib
import json
import re
import shutil
import sys
from pathlib import Path
from typing import Any

# Allow `import recoil...` without an editable install (repo root = 4 up).
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent))

from recoil.core.atomic_write import atomic_write_json  # noqa: E402
from recoil.core.model_profiles import get_model  # noqa: E402
from recoil.core.paths import ProjectPaths, projects_root  # noqa: E402
from recoil.execution.providers.fal import _GPT_IMAGE_2_TARIFF_USD  # noqa: E402
from recoil.pipeline._lib.sidecar import compute_sha256  # noqa: E402
from recoil.pipeline._lib.sublocation_registry import validate_ref_file  # noqa: E402


PROMPT_VERSION = "sublocation_ref_v02"
MODEL = "gpt-image-2"
ASPECT_RATIO = "9:16"
SIZE_OVERRIDE = "2160x3840"
REGISTRY_SCHEMA_VERSION = 1


class GenSublocationsError(RuntimeError):
    """Raised when the sublocation ref generator cannot proceed."""


def bible_desc_sha256(desc: str) -> str:
    """Hash a bible sublocation description with ``.strip()`` only."""

    return hashlib.sha256(str(desc).strip().encode("utf-8")).hexdigest()


def generate_sublocations(
    project: str,
    location_id: str,
    *,
    probe: bool = False,
    dry_run: bool = False,
    adjacency_confirm: bool = False,
    restamp: bool = False,
    quality: str = "high",
) -> dict[str, Any]:
    """Generate missing refs and reconcile the location registry."""

    paths = ProjectPaths.for_project(project)
    bible = _read_json_object(paths.global_bible_path)
    bible_sublocs = _bible_sublocations(bible, location_id)
    sheet_path, sheet_version = _highest_master_plate(paths, location_id)
    base_dir = paths.asset_look_dir("loc", location_id, "base")
    subloc_dir = base_dir / "sublocations"
    registry_path = base_dir / "location.json"
    registry = _load_registry(registry_path)
    _enforce_registry_subset(registry, bible_sublocs)

    if probe and restamp:
        raise GenSublocationsError("--probe and --restamp cannot be combined")

    existing_registry = registry is not None
    plate_sha = compute_sha256(sheet_path)
    generated: list[str] = []
    failed: list[dict[str, str]] = []

    missing = [
        name
        for name in bible_sublocs
        if _latest_valid_ref(subloc_dir, name) is None
    ]

    if probe:
        if not missing:
            print("probe: no missing sublocation refs")
            _print_tariff_estimate(quality, 0)
            return {"generated": [], "failed": [], "registry_written": False}
        if dry_run:
            print(f"dry-run probe would generate: {missing[0]}")
            _print_tariff_estimate(quality, 1)
            return {"generated": [], "failed": [], "registry_written": False}

        name = missing[0]
        result = _generate_one(
            project,
            location_id,
            name,
            str(bible_sublocs[name].get("description", "")),
            sheet_path,
            sheet_version,
            plate_sha,
            subloc_dir,
            quality,
        )
        if result.get("error"):
            failed.append({"sublocation": name, "error": result["error"]})
        else:
            generated.append(name)
        _print_tariff_estimate(quality, 1)
        print("probe output is a real ref; no registry entry written")
        return {"generated": generated, "failed": failed, "registry_written": False}

    if not restamp:
        for name in missing:
            if dry_run:
                print(f"dry-run would generate: {name}")
                continue
            result = _generate_one(
                project,
                location_id,
                name,
                str(bible_sublocs[name].get("description", "")),
                sheet_path,
                sheet_version,
                plate_sha,
                subloc_dir,
                quality,
            )
            if result.get("error"):
                failed.append({"sublocation": name, "error": result["error"]})
            else:
                generated.append(name)

    registry_payload, proposal_path = _build_reconciled_registry(
        project,
        location_id,
        bible_sublocs,
        base_dir,
        registry,
        adjacency_confirm=adjacency_confirm,
        write_proposal=not dry_run,
    )

    if dry_run:
        print("dry-run: registry not written")
        return {
            "generated": generated,
            "failed": failed,
            "registry_written": False,
            "registry": registry_payload,
            "adjacency_proposal": proposal_path,
        }

    # Fail-open guard: never CREATE a registry with an empty sublocations map.
    # No location.json = undecomposed = everything permitted; an empty-but-
    # present registry is decomposed-with-nothing-allowed — a fail-closed
    # regression when every generation in a fresh run failed.
    if not registry_path.exists() and not (registry_payload.get("sublocations") or {}):
        print("no valid refs landed and no existing registry — leaving location undecomposed")
        return {
            "generated": generated,
            "failed": failed,
            "registry_written": False,
            "registry": registry_payload,
            "adjacency_proposal": proposal_path,
        }

    # Automatic reconciliation when writing an existing registry: source hashes
    # are always re-stamped to bible description hashes here.
    atomic_write_json(registry_path, registry_payload, indent=2)
    print(f"wrote registry: {registry_path}")
    if existing_registry:
        print("reconciled existing registry source_sha256 to bible description hashes")
    return {
        "generated": generated,
        "failed": failed,
        "registry_written": True,
        "registry": registry_payload,
        "adjacency_proposal": proposal_path,
    }


def _generate_one(
    project: str,
    location_id: str,
    name: str,
    description: str,
    sheet_path: Path,
    sheet_version: int,
    plate_sha: str,
    subloc_dir: Path,
    quality: str,
) -> dict[str, str]:
    dest = _next_ref_path(subloc_dir, name)
    raw_out = _dispatch_sublocation_ref(
        project,
        location_id,
        name,
        _build_prompt(location_id, name, description),
        quality,
        [sheet_path],
    )
    dest.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(raw_out, dest)

    err = validate_ref_file(dest)
    if err:
        print(f"validation failed for {name}: {err}", file=sys.stderr)
        return {"error": err}

    sidecar = {
        "kind": "location_sublocation_ref",
        "sublocation": name,
        "source_asset": f"sheets/sheet_v{sheet_version}.png",
        "source_sha256": plate_sha,
        "sha256": compute_sha256(dest),
        "bible_desc_sha256": bible_desc_sha256(description),
        "derivation": {
            "model": MODEL,
            "method": "image_edit",
            "prompt_version": PROMPT_VERSION,
        },
    }
    atomic_write_json(dest.parent / (dest.name + ".json"), sidecar, indent=2)
    print(f"generated {name}: {dest}")
    return {"path": str(dest)}


def _dispatch_sublocation_ref(
    project: str,
    location_id: str,
    name: str,
    prompt: str,
    quality: str,
    reference_images: list[Path],
) -> Path:
    """Fire one sync-polled gpt-image-2 edit, matching generate_composite_sheet."""

    from recoil.execution.execution_store import ExecutionStore
    from recoil.execution.step_runner import StepRunner
    from recoil.execution.step_types import ProjectPaths as EpisodeProjectPaths
    from recoil.pipeline.core import DispatchContext, dispatch

    store = ExecutionStore(
        project=project,
        db_path=Path(f"/tmp/subloc_gen_store_{project}_{location_id}_{name}"),
    )
    paths = EpisodeProjectPaths.for_episode(project, 1)
    sr = StepRunner(store=store, paths=paths)
    ctx = DispatchContext(
        caller_id=f"gen_sublocations:{location_id}:{name}",
        step_runner=sr,
        project=project,
        episode=1,
        receipts_log_path="DISABLED",
    )
    payload = {
        "shot_id": f"subloc_{location_id}_{name}",
        "model": MODEL,
        "prompt": prompt,
        "aspect_ratio": ASPECT_RATIO,
        "quality": quality,
        "reference_images": [str(p) for p in reference_images],
        "size_override": SIZE_OVERRIDE,
    }
    receipt = dispatch("image_t2i", payload, context=ctx)

    rr = receipt.run_result
    if not rr.success:
        raise GenSublocationsError(f"sublocation ref generation failed: {rr.error}")

    raw_out = Path(rr.output_path) if rr.output_path else None
    if raw_out is None:
        raise GenSublocationsError("sublocation ref generation returned no output_path")
    if not raw_out.is_absolute():
        raw_out = projects_root() / project / str(rr.output_path)
    if not raw_out.exists() or raw_out.stat().st_size == 0:
        raise GenSublocationsError(f"sublocation ref output missing at {raw_out}")
    return raw_out


def _build_reconciled_registry(
    project: str,
    location_id: str,
    bible_sublocs: dict[str, dict],
    base_dir: Path,
    registry: dict | None,
    *,
    adjacency_confirm: bool,
    write_proposal: bool,
) -> tuple[dict, Path | None]:
    entries: dict[str, dict[str, str]] = {}
    for name, spec in bible_sublocs.items():
        desc = str(spec.get("description", ""))
        ref_path = _preferred_registry_ref(base_dir, registry, name)
        if ref_path is None:
            print(f"no valid ref for {name}; skipping registry entry", file=sys.stderr)
            continue
        rel = ref_path.relative_to(base_dir).as_posix()
        entries[name] = {"ref": rel, "source_sha256": bible_desc_sha256(desc)}

    proposal_path: Path | None = None
    if registry is None:
        if not write_proposal:
            # dry-run: zero network — no LLM adjacency proposal, no spend,
            # no credentials required.
            return (
                {
                    "schema_version": REGISTRY_SCHEMA_VERSION,
                    "location_id": location_id,
                    "sublocations": entries,
                    "adjacency": [],
                },
                None,
            )
        try:
            proposed = _propose_adjacency_pairs(project, location_id, bible_sublocs)
        except Exception as exc:  # noqa: BLE001 — the proposal is advisory
            if adjacency_confirm:
                # --adjacency-confirm writes the graph INTO the registry; a
                # failed proposal is a hard error there (nothing to confirm).
                raise
            # Without --adjacency-confirm the registry takes adjacency=[]
            # regardless (the proposal only seeds a side file), so a transient
            # model failure must NOT abort the no-LLM sublocations map write.
            print(
                f"adjacency proposal skipped ({type(exc).__name__}); registry "
                "written with empty adjacency — re-run with --adjacency-confirm "
                "once the model is reachable",
                file=sys.stderr,
            )
            return (
                {
                    "schema_version": REGISTRY_SCHEMA_VERSION,
                    "location_id": location_id,
                    "sublocations": entries,
                    "adjacency": [],
                },
                None,
            )
        if adjacency_confirm:
            adjacency = proposed
        else:
            adjacency = []
            if write_proposal:
                proposal_path = base_dir / "adjacency_proposal.json"
                atomic_write_json(proposal_path, {"adjacency": proposed}, indent=2)
        print(
            "proposed adjacency: "
            f"{json.dumps(proposed, separators=(',', ':'))} — "
            "re-run with --adjacency-confirm to write"
        )
    else:
        adjacency = registry.get("adjacency") or []
        if adjacency_confirm and not adjacency and write_proposal:
            # Recovery path: a prior run wrote the registry with empty adjacency
            # (advisory proposal failed). --adjacency-confirm must be able to
            # recompute and write the graph for an existing empty registry,
            # otherwise the location is stuck at adjacency=[] forever.
            adjacency = _propose_adjacency_pairs(project, location_id, bible_sublocs)

    return (
        {
            "schema_version": REGISTRY_SCHEMA_VERSION,
            "location_id": location_id,
            "sublocations": entries,
            "adjacency": adjacency,
        },
        proposal_path,
    )


def _preferred_registry_ref(base_dir: Path, registry: dict | None, name: str) -> Path | None:
    if registry:
        entry = (registry.get("sublocations") or {}).get(name) or {}
        ref = entry.get("ref")
        if ref:
            candidate = base_dir / ref
            if validate_ref_file(candidate) is None:
                return candidate
            print(f"registry ref for {name} failed validation: {candidate}", file=sys.stderr)
    return _latest_valid_ref(base_dir / "sublocations", name)


def _build_prompt(location_id: str, name: str, description: str) -> str:
    return (
        "Derive a vertical 9:16 establishing reference image for one named "
        "sublocation inside the provided master location plate. Preserve the "
        "location's architecture, materials, lighting logic, scale, and visual "
        "continuity from the reference image. Do not invent a new setting. "
        "Focus the composition on this area while keeping it production-useful "
        "as a clean environment reference.\n\n"
        f"Location id: {location_id}\n"
        f"Sublocation: {name}\n"
        f"Bible description: {description.strip()}\n"
        "S-1 derivation lineage: this sublocation name and description were "
        "approved through the Gate A bible flow and must be treated as the "
        "semantic source of truth.\n"
        f"prompt_version: {PROMPT_VERSION}"
    )


def _propose_adjacency_pairs(
    project: str,
    location_id: str,
    bible_sublocs: dict[str, dict],
) -> list[list[str]]:
    model = get_model("prose_author", "text")
    prompt = json.dumps(
        {
            "project": project,
            "location_id": location_id,
            "sublocations": {
                name: str(spec.get("description", "")).strip()
                for name, spec in bible_sublocs.items()
            },
            "instruction": (
                "Return strict JSON only: {\"adjacency\": [[a,b], ...]}. "
                "Only include unordered physical adjacency pairs between listed keys."
            ),
        },
        indent=2,
        sort_keys=True,
    )
    raw = _call_adjacency_model(model, prompt)
    return _parse_adjacency(raw, set(bible_sublocs))


def _call_adjacency_model(model: str, prompt: str) -> str:
    from recoil.core.anthropic_client import anthropic_client

    client = anthropic_client()
    # No sampling params: adaptive-thinking models reject temperature/top_p.
    response = client.messages.create(
        model=model,
        max_tokens=1000,
        messages=[{"role": "user", "content": prompt}],
    )
    parts = []
    for block in getattr(response, "content", []) or []:
        text = getattr(block, "text", None)
        if text:
            parts.append(text)
    return "\n".join(parts)


def _parse_adjacency(raw: str, allowed: set[str]) -> list[list[str]]:
    try:
        data = json.loads(raw)
    except json.JSONDecodeError as exc:
        raise GenSublocationsError("adjacency proposal returned invalid JSON") from exc
    pairs = data.get("adjacency") if isinstance(data, dict) else data
    if not isinstance(pairs, list):
        raise GenSublocationsError("adjacency proposal must be a list")
    clean: list[list[str]] = []
    seen: set[tuple[str, str]] = set()
    for pair in pairs:
        if not isinstance(pair, list) or len(pair) != 2:
            raise GenSublocationsError(f"invalid adjacency pair: {pair!r}")
        a, b = str(pair[0]), str(pair[1])
        if a not in allowed or b not in allowed:
            raise GenSublocationsError(f"adjacency pair uses unknown key: {pair!r}")
        key = tuple(sorted((a, b)))
        if a == b or key in seen:
            continue
        seen.add(key)
        clean.append([a, b])
    return clean


def _print_tariff_estimate(quality: str, count: int) -> None:
    tariff = _GPT_IMAGE_2_TARIFF_USD[(3840, 2160)].get(quality)
    if tariff is None:
        tariff = _GPT_IMAGE_2_TARIFF_USD[(3840, 2160)]["high"]
    print(
        f"tariff-estimated gpt-image-2 4K {quality} cost: "
        f"${tariff:.2f} each x {count} = ${tariff * count:.2f}"
    )
    print("Cost is tariff-ESTIMATED; fal exposes no billing for this endpoint.")
    print("verify actual spend on the fal dashboard before the full run")


def _read_json_object(path: Path) -> dict:
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except OSError as exc:
        raise GenSublocationsError(f"cannot read JSON file: {path}") from exc
    except json.JSONDecodeError as exc:
        raise GenSublocationsError(f"invalid JSON file: {path}") from exc
    if not isinstance(data, dict):
        raise GenSublocationsError(f"expected JSON object: {path}")
    return data


def _load_registry(path: Path) -> dict | None:
    if not path.is_file():
        return None
    return _read_json_object(path)


def _bible_sublocations(bible: dict, location_id: str) -> dict[str, dict]:
    location = (bible.get("locations") or {}).get(location_id)
    sublocs = (location or {}).get("sublocations") if isinstance(location, dict) else None
    if not sublocs:
        raise GenSublocationsError(
            "location has no bible sublocations block — author it via the Gate A flow first"
        )
    if not isinstance(sublocs, dict):
        raise GenSublocationsError("bible sublocations block must be an object")
    for name, spec in sublocs.items():
        if not isinstance(spec, dict) or not str(spec.get("description", "")).strip():
            raise GenSublocationsError(
                f"bible sublocation {name!r} must contain a non-empty description"
            )
    return sublocs


def _highest_master_plate(paths: ProjectPaths, location_id: str) -> tuple[Path, int]:
    sheets_dir = paths.get_location_sheets_dir(location_id)
    best: tuple[int, Path] | None = None
    pattern = re.compile(r"^sheet_v(\d+)\.png$")
    for path in sheets_dir.glob("sheet_v*.png"):
        match = pattern.match(path.name)
        if not match:
            continue
        version = int(match.group(1))
        if best is None or version > best[0]:
            best = (version, path)
    if best is None:
        raise GenSublocationsError(f"no master plate found in {sheets_dir}")
    return best[1], best[0]


def _latest_valid_ref(subloc_dir: Path, name: str) -> Path | None:
    best: tuple[int, Path] | None = None
    for path in subloc_dir.glob(f"sublocation_{name}_v*.png"):
        version = _ref_version(path, name)
        if version is None:
            continue
        if validate_ref_file(path) is not None:
            continue
        if best is None or version > best[0]:
            best = (version, path)
    return best[1] if best else None


def _next_ref_path(subloc_dir: Path, name: str) -> Path:
    max_version = 0
    for path in subloc_dir.glob(f"sublocation_{name}_v*.png"):
        version = _ref_version(path, name)
        if version is not None:
            max_version = max(max_version, version)
    return subloc_dir / f"sublocation_{name}_v{max_version + 1:02d}.png"


def _ref_version(path: Path, name: str) -> int | None:
    match = re.match(rf"^sublocation_{re.escape(name)}_v(\d+)\.png$", path.name)
    return int(match.group(1)) if match else None


def _enforce_registry_subset(registry: dict | None, bible_sublocs: dict[str, dict]) -> None:
    if not registry:
        return
    unknown = sorted(set((registry.get("sublocations") or {})) - set(bible_sublocs))
    if unknown:
        raise GenSublocationsError(
            "registry contains sublocations absent from bible: " + ", ".join(unknown)
        )


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--project", required=True)
    parser.add_argument("--location", required=True, dest="location_id")
    parser.add_argument("--probe", action="store_true")
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--adjacency-confirm", action="store_true")
    parser.add_argument("--restamp", action="store_true")
    parser.add_argument("--quality", default="high", choices=["low", "medium", "high"])
    args = parser.parse_args(argv)

    try:
        generate_sublocations(
            args.project,
            args.location_id,
            probe=args.probe,
            dry_run=args.dry_run,
            adjacency_confirm=args.adjacency_confirm,
            restamp=args.restamp,
            quality=args.quality,
        )
    except GenSublocationsError as exc:
        print(f"gen_sublocations: ERROR: {exc}", file=sys.stderr)
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
