"""Shared ID validation for path-segment IDs (Debug R1 bug-fix).

These regexes guard against path traversal via unsanitized IDs that would
otherwise be interpolated into filesystem paths (e.g. ``shots/{beat_id}.json``).
A request like ``GET /api/projects/../../etc/passwd`` would otherwise resolve
to ``projects/../../etc/passwd/state/visual/global_bible.json`` — escaping
the projects root.

Contract:
  • ``PROJECT_ID_RE``    — agrees with @recoil/contracts ProjectId regex.
  • ``HIERARCHY_ID_RE``  — superset of beat / take / episode / scene IDs.

Adapters call ``validate_project_id`` / ``validate_hierarchy_id`` at the
public-API entry point; routes catch ``ValueError`` and surface 400.
"""
from __future__ import annotations

import re

# Lowercase letter or digit lead; lowercase, digits, ``-``, ``_`` thereafter.
# 1–64 chars total. Matches the @recoil/contracts ProjectId.
PROJECT_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")

# Beat / take / episode / scene / memory-entry IDs are mixed case + digits +
# underscore + dash. 1–64 chars total.
# Examples: "EP001_SH02", "EP001_SH05A", "T_alpha", "L-2026-03-25".
#
# Bug-fix (Debug R3): dash widened in. The memory adapter synthesizes
# fallback IDs of shape ``L-YYYY-MM-DD`` (recoil/api/adapters/memory.py:96)
# when LEARNINGS.md entries lack an explicit ID — and most do. Without
# the dash, /api/memory/{entry_id}/toggle returns 400 for every such entry.
# Path-traversal protection is unaffected: ``-`` is filesystem-safe, the
# routing layer still blocks ``/`` (and decoded ``%2f``), and a leading
# ``.`` (e.g. ``..``) is still excluded by the char class.
HIERARCHY_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")


def validate_project_id(project_id: str) -> str:
    """Return ``project_id`` if it matches PROJECT_ID_RE, else raise ValueError."""
    if not isinstance(project_id, str) or not PROJECT_ID_RE.match(project_id):
        raise ValueError(f"invalid project_id: {project_id!r}")
    return project_id


def validate_hierarchy_id(name: str, value: str) -> str:
    """Return ``value`` if it matches HIERARCHY_ID_RE, else raise ValueError.

    ``name`` is for the error message — pass ``"beat_id"`` / ``"take_id"`` /
    ``"episode_id"`` / ``"scene_id"`` so the 400 surfaces which field failed.
    """
    if not isinstance(value, str) or not HIERARCHY_ID_RE.match(value):
        raise ValueError(f"invalid {name}: {value!r}")
    return value


__all__ = [
    "PROJECT_ID_RE",
    "HIERARCHY_ID_RE",
    "validate_project_id",
    "validate_hierarchy_id",
]
