"""
taxonomy.py — Asset naming convention, type taxonomy, and slot mapping.

Single source of truth for the 6 canonical asset types, the naming regex,
filename parsing, and pipeline slot allocation rules.

Human-readable documentation: ASSET_TAXONOMY.md
Numeric constants (weights, slots): CONSTANTS.md § Asset Taxonomy & Pipeline Allocation

Naming format:  {subject}_{type}_{variant}_v{NN}.{ext}
  - subject:  lowercase alphanumeric + hyphens (torch, int-lower-decks, old-man)
  - type:     one of 6 enum values (identity, turn, expr, loc, prop, scene)
  - variant:  lowercase alphanumeric + hyphens (hero, front, wide, pipe-detail)
  - version:  zero-padded 2-digit integer (v01, v02, v99)
  - ext:      png | jpeg | webp  (.jpg normalized to .jpeg on ingest)
"""

import re
from dataclasses import dataclass
from pathlib import Path

# ======================================================================
# Asset Type Enum (the 6 canonical types)
# ======================================================================

VALID_TYPES = frozenset({"identity", "turn", "expr", "loc", "prop", "scene"})

# ======================================================================
# Extension Normalization
# ======================================================================

EXTENSION_NORMALIZE = {
    ".jpg": ".jpeg",
    ".tif": ".tiff",
}


def normalize_extension(filename: str) -> str:
    """Normalize file extension (.jpg -> .jpeg) in a filename string."""
    for old_ext, new_ext in EXTENSION_NORMALIZE.items():
        if filename.lower().endswith(old_ext):
            return filename[: -len(old_ext)] + new_ext
    return filename


# ======================================================================
# The ONE Regex
# ======================================================================

ASSET_FILENAME_RE = re.compile(
    r"^(?P<subject>[a-z][a-z0-9]*(?:-[a-z0-9]+)*)"
    r"_(?P<type>identity|turn|expr|loc|prop|scene)"
    r"_(?P<variant>[a-z][a-z0-9]*(?:-[a-z0-9]+)*)"
    r"_v(?P<version>[0-9]{2})"
    r"\.(?P<ext>png|jpeg|webp)$"
)


# ======================================================================
# AssetId Dataclass
# ======================================================================

@dataclass(frozen=True, slots=True)
class AssetId:
    """Parsed representation of an asset filename."""
    subject: str
    type: str
    variant: str
    version: int
    ext: str

    @property
    def filename(self) -> str:
        """Reconstruct the canonical filename."""
        return f"{self.subject}_{self.type}_{self.variant}_v{self.version:02d}.{self.ext}"

    def bump(self) -> "AssetId":
        """Return a new AssetId with version incremented by 1."""
        return AssetId(
            subject=self.subject,
            type=self.type,
            variant=self.variant,
            version=self.version + 1,
            ext=self.ext,
        )


# ======================================================================
# Parse Function
# ======================================================================

class AssetNameError(ValueError):
    """Raised when a filename does not conform to the asset naming convention."""
    pass


def parse_asset_filename(filename: str, *, normalize: bool = True) -> AssetId:
    """Parse an asset filename into its constituent segments.

    Args:
        filename: The filename to parse (no directory components).
            Example: "torch_identity_hero_v01.png"
        normalize: If True, normalize extensions (.jpg -> .jpeg) before
            matching. If False, non-canonical extensions will fail to match.

    Returns:
        An AssetId dataclass with subject, type, variant, version, ext.

    Raises:
        AssetNameError: If the filename does not match the asset convention.
    """
    if "/" in filename or "\\" in filename:
        raise AssetNameError(
            f"Expected a bare filename, got a path: {filename!r}"
        )

    # Normalize extension before regex match
    if normalize:
        filename = normalize_extension(filename)

    match = ASSET_FILENAME_RE.match(filename)
    if match is None:
        raise AssetNameError(
            f"Filename does not match asset convention: {filename!r}. "
            f"Expected: {{subject}}_{{type}}_{{variant}}_v{{NN}}.{{ext}} "
            f"where type is one of {sorted(VALID_TYPES)}"
        )

    return AssetId(
        subject=match["subject"],
        type=match["type"],
        variant=match["variant"],
        version=int(match["version"]),
        ext=match["ext"],
    )


def is_valid_asset_filename(filename: str) -> bool:
    """Return True if the filename matches the asset naming convention."""
    try:
        parse_asset_filename(filename)
        return True
    except AssetNameError:
        return False


# ======================================================================
# Slot Mapping (Pipeline Integration)
# ======================================================================

@dataclass(frozen=True, slots=True)
class SlotSpec:
    """Pipeline slot specification for an asset type."""
    asset_type: str
    slot: str
    weight_min: int
    weight_max: int
    order: int          # Recency bias order (1=first/least attention, 6=last/most)
    auto_eligible: bool  # Can the pipeline auto-resolve this type?


SLOT_MAP: dict[str, SlotSpec] = {
    "loc":      SlotSpec("loc",      "environment", 1,  2,  1, True),
    "prop":     SlotSpec("prop",     "prop",        3,  4,  2, True),
    "expr":     SlotSpec("expr",     "expression",  6,  7,  3, True),
    "turn":     SlotSpec("turn",     "structure",   4,  5,  4, True),
    "identity": SlotSpec("identity", "subject",     8,  10, 5, True),
    "scene":    SlotSpec("scene",    "vibe",        2,  3,  6, False),
}

# Ordered by recency bias (pipeline insertion order)
SLOT_ORDER = sorted(SLOT_MAP.values(), key=lambda s: s.order)


def default_weight(asset_type: str) -> int:
    """Return the default weight for an asset type (midpoint of range)."""
    spec = SLOT_MAP.get(asset_type)
    if spec is None:
        return 5
    return (spec.weight_min + spec.weight_max) // 2


def validate_ref_weight(asset_type: str, weight: int) -> list[str]:
    """Return warnings if weight is outside the recommended range for this type."""
    spec = SLOT_MAP.get(asset_type)
    if spec is None:
        return [f"Unknown asset type: {asset_type!r}"]
    warnings = []
    if weight < spec.weight_min:
        warnings.append(
            f"{asset_type} weight {weight} is below recommended minimum {spec.weight_min}"
        )
    if weight > spec.weight_max:
        warnings.append(
            f"{asset_type} weight {weight} is above recommended maximum {spec.weight_max}"
        )
    return warnings


def validate_ref_auto(asset_type: str, auto: bool) -> list[str]:
    """Return errors if auto is True on a non-auto-eligible type (scene)."""
    spec = SLOT_MAP.get(asset_type)
    if spec is None:
        return [f"Unknown asset type: {asset_type!r}"]
    if auto and not spec.auto_eligible:
        return [f"{asset_type} refs must not be auto-resolved (auto must be false)"]
    return []


# ======================================================================
# Filename Construction
# ======================================================================

def build_asset_filename(
    subject: str,
    asset_type: str,
    variant: str,
    version: int = 1,
    ext: str = "png",
) -> str:
    """Build a canonical asset filename from components.

    Raises AssetNameError if the result would be invalid.
    """
    if asset_type not in VALID_TYPES:
        raise AssetNameError(f"Invalid asset type: {asset_type!r}. Must be one of {sorted(VALID_TYPES)}")
    filename = f"{subject}_{asset_type}_{variant}_v{version:02d}.{ext}"
    # Validate round-trip
    parse_asset_filename(filename, normalize=False)
    return filename


def next_version(directory: Path, subject: str, asset_type: str, variant: str) -> int:
    """Scan a directory for existing versions and return the next available version number."""
    max_ver = 0
    for p in directory.iterdir():
        try:
            aid = parse_asset_filename(p.name)
            if aid.subject == subject and aid.type == asset_type and aid.variant == variant:
                max_ver = max(max_ver, aid.version)
        except AssetNameError:
            continue
    return max_ver + 1


# ======================================================================
# Universal Asset Slugify
# ======================================================================

_SLUGIFY_RE = None

def slugify_asset_id(asset_id: str) -> str:
    """Universal slugify for characters, locations, and props.

    Preserves INT/EXT prefix (semantically meaningful for lighting).
    Strips non-alphanumeric except underscores.

    Examples:
        'SADIE'              -> 'sadie'
        'INT. Dusty\'s Bar'  -> 'int_dustys_bar'
        'EXT. City Street'   -> 'ext_city_street'
        'INT/EXT. Alley'     -> 'int_ext_alley'
        'int_sadie_apartment'-> 'int_sadie_apartment'  (passthrough)
    """
    global _SLUGIFY_RE
    if _SLUGIFY_RE is None:
        import re as _re
        _SLUGIFY_RE = _re.compile(r'[^a-z0-9_]')

    slug = asset_id.lower()
    # Normalize INT./EXT. prefix: keep prefix, lose the dot+space
    for prefix in ("int/ext. ", "int. ", "ext. "):
        if slug.startswith(prefix):
            replacement = prefix.replace("/", "_").replace(". ", "_").replace(".", "_")
            slug = replacement + slug[len(prefix):]
            break
    # Forward slash -> underscore
    slug = slug.replace("/", "_")
    # Strip apostrophes early so "Dusty's" -> "Dustys" (not "Dusty_s")
    slug = slug.replace("'", "")
    # Strip all non-alphanumeric/underscore
    slug = _SLUGIFY_RE.sub("_", slug)
    # Collapse multiple underscores
    while "__" in slug:
        slug = slug.replace("__", "_")
    return slug.strip("_")


# ======================================================================
# v2 Layout Helpers — added 2026-05-26 by project-paths-refactor-v2
# ======================================================================

# Public alias matching the canonical name exported by recoil.core.paths.
# Kept identical to VALID_TYPES — this is the same set, named to match the
# directory-tree vocabulary. Both names are valid.
VALID_ASSET_KINDS = VALID_TYPES


# Mapping from v1 legacy plural-English directory names to v2 singular
# taxonomy kinds. Used by the migration script (scripts/migrate_v2_layout.py)
# when relocating files from output/refs/{plural}/ into assets/{kind}/.
LEGACY_PLURAL_TO_KIND = {
    "characters": "identity",
    "locations": "loc",
    "props": "prop",
    # Note: turnarounds and expressions historically lived inside
    # characters/{subject}/ as filename suffixes, not separate directories.
    # The migration script sub-classifies them via filename inspection —
    # there's no top-level plural-English directory to translate.
}


def kind_for_legacy_type(legacy: str) -> str:
    """Translate a v1 plural-English entity type to its v2 singular kind.

    Raises ValueError on unknown legacy types — the migration script catches
    and falls back to its unmatched-file classifier (see Risk #5).
    """
    if legacy in LEGACY_PLURAL_TO_KIND:
        return LEGACY_PLURAL_TO_KIND[legacy]
    if legacy in VALID_ASSET_KINDS:
        # Already a v2 kind — pass through.
        return legacy
    raise ValueError(
        f"Unknown legacy type {legacy!r}. Expected one of "
        f"{sorted(LEGACY_PLURAL_TO_KIND)} or a v2 kind in {sorted(VALID_ASSET_KINDS)}."
    )


def classify_legacy_filename(filename: str) -> str:
    """Classify a filename from a v1 characters/{subject}/ directory into a v2 kind.

    The v1 layout grouped identity refs, turnarounds, and expression refs all
    into characters/{subject}/. The migration script needs to split them:

      - filename contains "_turn_" or "_angle_" or matches /^{subject}_(front|profile|three_quarter|back|closeup)/  → "turn"
      - filename contains "_expr_" or matches a known emotion label (anger, fear, joy, etc.)  → "expr"
      - everything else (including hero, _identity_) → "identity"

    Returns one of: "identity", "turn", "expr".
    """
    lower = filename.lower()

    # Turn-classification: explicit type tokens or known turnaround angles
    if "_turn_" in lower or "_angle_" in lower:
        return "turn"
    for angle in ("_front", "_profile", "_three_quarter", "_back", "_closeup"):
        if angle in lower:
            return "turn"

    # Expr-classification: explicit type token or known emotion labels
    if "_expr_" in lower:
        return "expr"
    EMOTION_LABELS = (
        "anger", "fear", "joy", "sadness", "disgust", "surprise",
        "neutral", "contempt", "anticipation",
        "_calm", "_smile", "_frown", "_grin", "_scowl",
    )
    for label in EMOTION_LABELS:
        if label in lower:
            return "expr"

    # Default: identity
    return "identity"
