"""Recents adapter — mtime-sorted media feed per project.

Ported from recoil/workspace/server.py:2341-2425 onto the engine API.
Adds explicit beat_id / episode_id / take_id derivation per row so the
frontend never parses paths.

Layout coverage (all 5 current projects):
  - Nested (afterimage, tartarus, ronin-drm):
      output/{previs,frames,video}/ep_NNN/shot_NNN_takeM.{png,mp4,...}
      episode_id = "EPNNN", beat_id = "EPNNN_SHNN", take_id from sidecar
      or filename.
  - Flat (driver-beware): output/<shot-folder>/take-N.{ext}
      episode_id = None, beat_id = <shot-folder>, take_id from sidecar.

When derivation cannot resolve an id, the field is null. The frontend
treats null beat_id as a disabled / greyed-out row. A
`recent_id_derivation_failed` fallback event is emitted for telemetry.
"""

from __future__ import annotations

import json
import logging
import re
import time as _time
from typing import Optional

from recoil.api.adapters._ids import validate_project_id
from recoil.api.fallback_bridge import emit_fallback
from recoil.core.paths import projects_root

logger = logging.getLogger(__name__)

# Mirror the workspace server's extension set.
MEDIA_EXTENSIONS = {".png", ".jpg", ".jpeg", ".webp", ".mp4", ".mov", ".webm"}
VIDEO_EXTENSIONS = {".mp4", ".mov", ".webm"}
SKIP_DIRS = ("_archive", "_meta", "boundary_frames")

# In-process cache mirroring the workspace endpoint (5s TTL).
# Keyed by project_id only — full sorted list cached, paginated in-memory so
# all (limit, offset) combos share one walk per TTL window.
_RECENT_CACHE: dict[str, tuple[float, list[dict]]] = {}
_RECENT_CACHE_TTL = 5.0

# Derivation patterns — kept narrow to avoid false-positive matches.
_EP_DIR_RE = re.compile(r"ep[_-]?(\d+)", re.IGNORECASE)
_SHOT_FILE_RE = re.compile(r"shot[_-]?(\d+)", re.IGNORECASE)
_TAKE_FILE_RE = re.compile(r"take[_-]?(\d+)", re.IGNORECASE)


def _build_metadata_index(project_id: str) -> dict[str, dict]:
    """Read sidecar JSONs from output/_meta/*.json (if present).

    Returns a {rel_path: metadata} map keyed by the rel_path on the parent
    media file. Failures are silent — recents continues without overlay.
    """
    project_dir = projects_root() / project_id
    meta_dir = project_dir / "output" / "_meta"
    if not meta_dir.is_dir():
        return {}
    index: dict[str, dict] = {}
    for sidecar in meta_dir.glob("*.json"):
        try:
            data = json.loads(sidecar.read_text(encoding="utf-8"))
        except (OSError, json.JSONDecodeError):
            continue
        if not isinstance(data, dict):
            continue
        target = data.get("target_path") or data.get("path")
        if isinstance(target, str):
            index[target] = data
    return index


def _derive_ids(
    rel_path: str, meta: Optional[dict]
) -> tuple[Optional[str], Optional[str], Optional[str]]:
    """Return (episode_id, beat_id, take_id) for a recent entry.

    Order of resolution per field:
      1. Sidecar metadata explicit (`episode_id`, `beat_id`/`shot_id`, `take_id`)
      2. Path-pattern parse:
         - episode_id from /ep_NNN/ segment in the relative path
         - beat_id from {episode}_SH{shot_num} when both are derivable, else
           the immediate parent folder name (flat layouts)
         - take_id from the filename's take_NNN segment
      3. None on failure (frontend disables the row, fallback event fires)
    """
    episode_id: Optional[str] = None
    beat_id: Optional[str] = None
    take_id: Optional[str] = None

    if meta:
        ep = meta.get("episode_id")
        if isinstance(ep, str) and ep.strip():
            episode_id = ep.strip()
        b = meta.get("beat_id") or meta.get("shot_id")
        if isinstance(b, str) and b.strip():
            beat_id = b.strip()
        t = meta.get("take_id")
        if isinstance(t, str) and t.strip():
            take_id = t.strip()

    parts = rel_path.split("/")
    name = parts[-1] if parts else ""

    if episode_id is None:
        for seg in parts:
            m = _EP_DIR_RE.fullmatch(seg)
            if m:
                episode_id = f"EP{int(m.group(1)):03d}"
                break

    if beat_id is None:
        m = _SHOT_FILE_RE.search(name)
        if m and episode_id:
            beat_id = f"{episode_id}_SH{int(m.group(1)):02d}"
        else:
            # Flat layout fallback: parent folder name. Skip generic folders
            # like `output`, `frames`, `previs`, `video`.
            generic = {"output", "frames", "previs", "video", "stills"}
            for seg in reversed(parts[:-1]):
                if seg.lower() not in generic and not _EP_DIR_RE.fullmatch(seg):
                    beat_id = seg
                    break

    if take_id is None:
        m = _TAKE_FILE_RE.search(name)
        if m and beat_id:
            take_id = f"{beat_id}_T{int(m.group(1)):03d}"

    return episode_id, beat_id, take_id


def list_recent(project_id: str, limit: int = 50, offset: int = 0) -> dict:
    """Return the recents response payload for a project.

    Shape:
      {
        "schemaVersion": 2,
        "files": [RecentEntry, ...],
        "total": int,
      }

    Raises ValueError on malformed project_id (route maps to 400).
    """
    validate_project_id(project_id)
    project_dir = projects_root() / project_id
    output_dir = project_dir / "output"
    if not output_dir.is_dir():
        return {"schemaVersion": 2, "files": [], "total": 0}

    now = _time.monotonic()
    cached_entry = _RECENT_CACHE.get(project_id)
    if cached_entry is not None:
        cached_at, cached_entries = cached_entry
        if now - cached_at < _RECENT_CACHE_TTL:
            total = len(cached_entries)
            return {"schemaVersion": 2, "files": cached_entries[offset : offset + limit], "total": total}

    meta_index = _build_metadata_index(project_id)

    entries: list[dict] = []
    failed_derivations: list[str] = []
    for path in output_dir.rglob("*"):
        if not path.is_file():
            continue
        if path.suffix.lower() not in MEDIA_EXTENSIONS:
            continue
        if path.name.startswith("."):
            continue
        rel_parts = path.relative_to(output_dir).parts
        if any(
            p in SKIP_DIRS or p.startswith(".") or "backup" in p.lower()
            for p in rel_parts
        ):
            continue

        try:
            mtime = path.stat().st_mtime
        except OSError:
            continue

        rel_path = str(path.relative_to(project_dir))
        ext = path.suffix.lower()
        media_type = "video" if ext in VIDEO_EXTENSIONS else "image"
        meta = meta_index.get(rel_path)

        entry: dict = {
            "schemaVersion": 2,
            "name": path.name,
            "path": rel_path,
            "media_url": f"/api/media/{project_id}/{rel_path}",
            "type": media_type,
            "mtime": mtime,
            "status": "untracked",
            "status_color": "gray",
            "model": None,
            "cost": None,
            "beat_id": None,
            "episode_id": None,
            "take_id": None,
        }
        if meta:
            entry["status"] = meta.get("status", entry["status"])
            entry["status_color"] = meta.get("status_color", entry["status_color"])
            entry["model"] = meta.get("model")
            entry["cost"] = meta.get("cost")

        episode_id, beat_id, take_id = _derive_ids(rel_path, meta)
        entry["episode_id"] = episode_id
        entry["beat_id"] = beat_id
        entry["take_id"] = take_id

        if beat_id is None:
            failed_derivations.append(rel_path)

        entries.append(entry)

    if failed_derivations:
        emit_fallback(
            "recent_id_derivation_failed",
            scope="api/adapters/recent",
            payload={"project_id": project_id, "count": len(failed_derivations), "paths": failed_derivations[:5]},
        )

    entries.sort(key=lambda e: e["mtime"], reverse=True)
    _RECENT_CACHE[project_id] = (now, entries)
    total = len(entries)
    return {"schemaVersion": 2, "files": entries[offset : offset + limit], "total": total}


__all__ = ["list_recent"]
