#!/usr/bin/env python3
"""Project-type namespace for Recoil.

A `Project` is identified by its directory name under `projects_root()`. Its
*project_type* (microdrama | client_deliverable | client_video) determines
which pipeline tools apply. Types are declared in
`projects/{name}/project_config.json`. Missing config defaults to `microdrama`
for backward compatibility — the 13+ existing microdrama projects (Tartarus,
Leviathan, Olympus, etc.) require zero filesystem changes.

Tools should NEVER check `if project.mode == "microdrama"` directly. They
check capability predicates (`uses_pass_naming`, `sweeps_orphans`, etc.)
which insulate callers from type count growth.

Deprecation notice (Build BC Phase 6, 2026-05-09): `project_type` is now the
authoritative field. `mode` is preserved as a derived backward-compat slot
mapping `MICRODRAMA → ProjectMode.MICRODRAMA` and all other types →
`ProjectMode.CLIENT_DELIVERABLE`. Direct reads of `project.mode` are
deprecated; callers should migrate to capability predicates or
`project._project_type_enum`. The `mode` slot will be removed in a future
sprint once all callers are migrated.

Caching (legacy rationale): the original design had no caching. Each
`get_project()` call constructed a fresh `Project`, which read
`project_config.json` once during `__init__`. The reasoning was that the
config is a 50-byte JSON file; reading it on demand is microsecond-fast
and avoids staleness bugs in long-running daemons.

Caching (Phase 2 update): per console-v2-overhaul BUILD_SPEC Phase 2,
`get_project()` is now wrapped in `functools.lru_cache(maxsize=64)` —
per-process, per-slug cached. Project instances expose richer policy
(aspect_ratio, project_type, default_models) and are reused across
callers. The staleness concern is mitigated because policy fields are
versioned (`schema_version`) and migrations are explicit. Callers that
need a fresh read can call `get_project.cache_clear()`.
"""

from __future__ import annotations

import json
import logging
import warnings
from enum import Enum
from functools import lru_cache  # noqa: F401 — used by BUILD_SPEC Phase 2 cache flip
from pathlib import Path
from typing import Any

from pydantic import BaseModel, ConfigDict, Field, field_validator

from recoil.core.paths import ProjectPaths, projects_root

__all__ = [
    "ProjectMode",
    "ProjectType",
    "AspectRatio",
    "AspectUnresolvable",
    "ProjectConfig",
    "Project",
    "get_project",
]


log = logging.getLogger(__name__)


# ── New typed exception ─────────────────────────────────────────


class AspectUnresolvable(ValueError):
    """Raised when a project has no resolvable aspect_ratio."""


# ── Enums ───────────────────────────────────────────────────────


class ProjectMode(str, Enum):
    """Project-type taxonomy for v0 (legacy alias).

    `str, Enum` so that JSON-serialized values compare equal to the raw
    strings, and `ProjectMode.MICRODRAMA == "microdrama"` is True.

    Phase 2 introduces `ProjectType` as the canonical name going forward.
    `ProjectMode` is preserved for backward compatibility — Phase 22
    schedules its deprecation cycle.
    """

    MICRODRAMA = "microdrama"
    CLIENT_DELIVERABLE = "client_deliverable"


class ProjectType(str, Enum):
    """Canonical project-type taxonomy (Phase 2+).

    Same value-space as the legacy `ProjectMode`; `ProjectType` is the
    name new code should reach for. `ProjectMode` remains exported for
    callers that have not yet migrated.
    """

    MICRODRAMA = "microdrama"
    CLIENT_DELIVERABLE = "client_deliverable"
    # CLIENT_VIDEO observed in production configs (driver-beware,
    # client_utilityvideo). BUILD_SPEC §5 listed only the first two; this
    # widening is a deliberate Phase 10 deviation to keep the migration
    # path safe for real projects. Phase 12's dry-run gate would otherwise
    # FAIL on these.
    CLIENT_VIDEO = "client_video"


class AspectRatio(str, Enum):
    NINE_SIXTEEN = "9_16"
    SIXTEEN_NINE = "16_9"
    ONE_ONE = "1_1"
    FOUR_THREE = "4_3"


_VALID_MODES = {m.value for m in ProjectMode}
_VALID_TYPES = {t.value for t in ProjectType}


_LEGACY_ASPECT_NORMALIZE: dict[str, str] = {
    "9:16": "9_16",
    "16:9": "16_9",
    "1:1": "1_1",
    "4:3": "4_3",
    "vertical": "9_16",
    "horizontal": "16_9",
    "square": "1_1",
}


# ── On-disk schema (Pydantic V2) ────────────────────────────────


class ProjectConfig(BaseModel):
    """On-disk schema for projects/{slug}/project_config.json (schema v2)."""

    model_config = ConfigDict(extra="allow")  # forward-compat (Corollary M4)

    schema_version: int = Field(default=2, ge=1)
    project_type: ProjectType = ProjectType.MICRODRAMA
    aspect_ratio: AspectRatio
    default_models: dict[str, str] = Field(default_factory=dict)
    mode: ProjectType | None = None  # legacy alias for project_type

    @field_validator("aspect_ratio", mode="before")
    @classmethod
    def _normalize_aspect(cls, v: Any) -> Any:
        if isinstance(v, str) and v in _LEGACY_ASPECT_NORMALIZE:
            return _LEGACY_ASPECT_NORMALIZE[v]
        return v


# ── Project class ───────────────────────────────────────────────


class Project:
    """Thin value object for a project's mode + capability predicates.

    Construction reads `project_config.json` once. Predicates are
    `@property` accessors that re-evaluate against `self.mode` and
    `self._config` — no I/O after construction.

    Phase 2 (additive):
      * `slug` — alias of `name`; both attributes hold the same string.
      * `aspect_synthesized` — True when the new ProjectConfig was
        synthesized from a legacy source (no `project_config.json`
        with `schema_version >= 2`).
      * `_project_config` — the new typed `ProjectConfig` (or `None`
        when the project has no config and no global_bible to derive
        one from; in that case the property accessors raise
        `AspectUnresolvable`, deferring the failure from construction
        time to first use — Phase 13 wires this into the API response).
    """

    __slots__ = (
        "name",
        "mode",
        "_config",
        "_project_config",
        "_project_type_enum",
        "aspect_synthesized",
    )

    def __init__(self, name: str):
        self.name = name
        self._config = self._read_config()
        # Build B Phase 6 (2026-05-09): project_type is authoritative; mode
        # is preserved as a derived deprecated alias for backward compat.
        pt_str = (
            self._config.get("project_type")
            or self._config.get("mode")
            or ProjectType.MICRODRAMA.value
        )
        if pt_str not in _VALID_TYPES:
            raise ValueError(
                f"Unknown project_type '{pt_str}' in "
                f"projects/{name}/project_config.json. "
                f"Valid types: {sorted(_VALID_TYPES)}"
            )
        self._project_type_enum = ProjectType(pt_str)
        # Set legacy `self.mode` slot for byte-compat with code that reads
        # the attribute directly.
        if self._project_type_enum == ProjectType.MICRODRAMA:
            self.mode = ProjectMode.MICRODRAMA
        else:
            self.mode = ProjectMode.CLIENT_DELIVERABLE
        self.aspect_synthesized = False
        # `_project_config` may be None when no config + no bible exist.
        # Property accessors raise AspectUnresolvable on access in that
        # case — see escape-hatch note in BUILD_SPEC Phase 2.
        self._project_config = self._load_project_config()

    @property
    def slug(self) -> str:
        """Canonical name; alias of `self.name` for SSOT-style call sites."""
        return self.name

    def _read_config(self) -> dict:
        config_path = projects_root() / self.name / "project_config.json"
        if not config_path.exists():
            warnings.warn(
                f"No project_config.json for '{self.name}' "
                f"— defaulting to microdrama mode",
                stacklevel=3,
            )
            return {"mode": ProjectMode.MICRODRAMA.value}
        with open(config_path, "r", encoding="utf-8") as f:
            return json.load(f)

    # ── Phase 2: ProjectConfig loading ─────────────────────────────

    def _config_path(self) -> Path:
        """Resolve the on-disk project_config.json path via projects_root SSOT."""
        return projects_root() / self.slug / "project_config.json"

    def _load_project_config(self) -> ProjectConfig | None:
        """Load (and migrate, if needed) the typed ProjectConfig.

        Returns None when the project has no config on disk AND no
        legacy source from which to derive an aspect_ratio — Phase 2
        defers the AspectUnresolvable raise to property access time so
        existing callers that construct `Project(name)` for legacy
        purposes keep working.

        Reuses `self._config` (already loaded by `_read_config()`) when
        the file exists — avoids a redundant disk read per construction.
        """
        path = self._config_path()
        if not path.exists():
            # No config on disk — try migration ladder for aspect.
            return self._safe_migrate_legacy()
        raw = self._config
        schema_v = int(raw.get("schema_version", 1))
        if schema_v >= 2 and "aspect_ratio" in raw:
            return ProjectConfig(**raw)
        # schema_version < 2 OR missing aspect_ratio — migrate IN-MEMORY ONLY.
        # Auto-persist was destructive (lost rich on-disk metadata when the
        # migration ladder built a minimal ProjectConfig from 4 fields). Phase
        # 12's `--no-dry-run` is the ONLY supported write path; normal Project
        # construction never modifies on-disk config.
        return self._safe_migrate_legacy(raw)

    def _safe_migrate_legacy(
        self, raw: dict | None = None
    ) -> ProjectConfig | None:
        """Wrap _migrate_legacy, returning None when AspectUnresolvable.

        Phase 2 escape hatch: existing tests (and live projects with no
        config + no global_bible) must keep constructing. The property
        accessors `.aspect_ratio` / `.project_type` / `.default_models`
        raise AspectUnresolvable on access in the None-sentinel case.
        """
        try:
            return self._migrate_legacy(raw)
        except AspectUnresolvable:
            self.aspect_synthesized = False
            return None

    def _migrate_legacy(self, raw: dict | None = None) -> ProjectConfig:
        raw = raw or {}
        aspect: str | None = raw.get("aspect_ratio")
        if not aspect:
            # Try global_bible.json as a derivation source. Check both
            # the canonical location (state/visual/global_bible.json per
            # pipeline/CLAUDE.md) and the legacy top-level location.
            root = projects_root()
            for bible_path in (
                ProjectPaths.for_project(self.slug).global_bible_path,
                root / self.slug / "global_bible.json",
            ):
                if not bible_path.exists():
                    continue
                try:
                    bible = json.loads(
                        bible_path.read_text(encoding="utf-8")
                    )
                except json.JSONDecodeError:
                    continue
                aspect = bible.get("aspect") or bible.get("aspect_ratio")
                if aspect:
                    break
        if not aspect:
            raise AspectUnresolvable(
                f"Project {self.slug!r}: no aspect_ratio in "
                f"project_config.json and no aspect in global_bible.json. "
                f"Backfill required."
            )
        self.aspect_synthesized = True
        log.warning(
            "project %s: aspect_ratio migrated from legacy source — "
            "backfill project_config.json",
            self.slug,
        )
        project_type_raw = (
            raw.get("project_type")
            or raw.get("mode")
            or "microdrama"
        )
        # Build the migrated config by MERGING the raw on-disk dict with the
        # computed canonical fields. ProjectConfig's `extra='allow'` would
        # preserve extras IF we passed them in — building from `**raw` first
        # then overriding the canonical fields keeps rich metadata
        # (style notes, camera body, song info, etc.) on disk if Phase 12's
        # `--no-dry-run` later persists this.
        merged = {
            **raw,
            "schema_version": 2,
            "project_type": project_type_raw,
            "aspect_ratio": aspect,
            "default_models": raw.get("default_models", {}),
        }
        return ProjectConfig(**merged)

    def _persist_migrated(self, cfg: ProjectConfig) -> None:
        """Persist migrated config back to disk (self-healing).

        Only invoked when `aspect_synthesized=True` (i.e. on a fresh
        legacy migration). Existing schema_version=2 configs are never
        rewritten. Phase 20 / CP-D will move atomic_write_json to a
        single canonical home; Phase 2 imports it from its current
        location, falling back to a local atomic write when not yet
        available.
        """
        path = self._config_path()
        path.parent.mkdir(parents=True, exist_ok=True)
        data = cfg.model_dump(mode="json", exclude_none=True)
        try:
            # Try canonical Phase-20 location first.
            from recoil.pipeline.core.persistence import (  # type: ignore
                atomic_write_json,
            )
            atomic_write_json(path, data)
            return
        except ImportError:
            pass
        try:
            # Try sidecar's locked variant (current de-facto SSOT).
            from recoil.workspace.sidecar import (  # type: ignore
                _atomic_write_json_locked as _aw,
            )
            _aw(path, data)
            return
        except ImportError:
            pass
        # Local fallback: tempfile + os.replace.
        import os
        import tempfile

        fd, tmp = tempfile.mkstemp(
            prefix=path.name + ".",
            dir=str(path.parent),
        )
        try:
            with os.fdopen(fd, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=2)
            os.replace(tmp, path)
        except Exception:
            try:
                os.unlink(tmp)
            except OSError:
                pass
            raise

    # ── Capability predicates ──────────────────────────────────────

    @property
    def uses_pass_naming(self) -> bool:
        """Whether the project uses SH-canonical coverage-pass filenames."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def sweeps_orphans(self) -> bool:
        """Whether _sweep_orphans should run on this project."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def uses_pass_store(self) -> bool:
        """Whether PassStore is the source of truth for this project."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def auto_extracts_segments(self) -> bool:
        """Whether _auto_extract_coverage_segments should run."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def captures_verdicts(self) -> bool:
        """Whether approve/reject verdicts are recorded for this project."""
        return self._project_type_enum in (
            ProjectType.MICRODRAMA,
            ProjectType.CLIENT_DELIVERABLE,
            ProjectType.CLIENT_VIDEO,
        )

    @property
    def has_take_semantics(self) -> bool:
        """Whether '_take{N}' is a structured suffix or part of the name."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def ui_grouping_strategy(self) -> str:
        """Workspace UI grouping mode: 'pass_anchors' or 'flat'."""
        if self._project_type_enum == ProjectType.MICRODRAMA:
            return "pass_anchors"
        return "flat"

    @property
    def is_client_deliverable(self) -> bool:
        """True when the project is any client deliverable (not microdrama)."""
        return self._project_type_enum in (
            ProjectType.CLIENT_VIDEO,
            ProjectType.CLIENT_DELIVERABLE,
        )

    @property
    def supports_episodes(self) -> bool:
        """True when this project organizes shots under episodes."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    @property
    def expects_beat_structure(self) -> bool:
        """True when the project expects beat-anchored shot organization."""
        return self._project_type_enum == ProjectType.MICRODRAMA

    # ── Phase 2: ProjectConfig accessors ───────────────────────────

    @property
    def aspect_ratio(self) -> str:
        """Canonical aspect-ratio string ('9_16' / '16_9' / '1_1' / '4_3').

        Raises AspectUnresolvable if no config / global_bible source
        exists. Phase 13 surfaces this via the API response with
        `aspect_synthesized` flagging.
        """
        if self._project_config is None:
            raise AspectUnresolvable(
                f"Project {self.slug!r}: no aspect_ratio in "
                f"project_config.json and no aspect in global_bible.json. "
                f"Backfill required."
            )
        return self._project_config.aspect_ratio.value

    @property
    def project_type(self) -> str:
        """Canonical project-type string ('microdrama' / 'client_deliverable' / 'client_video')."""
        if self._project_config is None:
            raise AspectUnresolvable(
                f"Project {self.slug!r}: ProjectConfig unresolvable; "
                f"backfill project_config.json."
            )
        return self._project_config.project_type.value

    @property
    def default_models(self) -> dict[str, str]:
        """Per-modality default model ids declared in project_config.json."""
        if self._project_config is None:
            raise AspectUnresolvable(
                f"Project {self.slug!r}: ProjectConfig unresolvable; "
                f"backfill project_config.json."
            )
        return self._project_config.default_models


@lru_cache(maxsize=64)
def get_project(name: str) -> Project:
    """Cached accessor — Project instances are reusable per-slug.

    Per BUILD_SPEC Phase 2 (console-v2-overhaul), this is now
    `functools.lru_cache(maxsize=64)`-wrapped: repeated calls for the
    same slug return the same `Project` instance. Long-running daemons
    that need to pick up an on-disk config edit must call
    `get_project.cache_clear()` explicitly.
    """
    return Project(name)
