"""Memory adapter — reads recoil/engine-memory/{LEARNINGS, ANTI_PATTERNS}.md
and emits MemoryEntry rows.

These markdown files are the engine's persistent learning surface (see
recoil/CLAUDE.md "Engine Memory"). Phase 16 exposes them read-only over
HTTP so the Console v2 EngineMemoryInspector tab can render them. The
markdown source remains the SSOT — editing happens via /analyze-production
and direct git commits, not via the API.

This adapter is READ-ONLY. It never writes to engine-memory/.
"""
from __future__ import annotations

import logging
import os
import re
from collections import OrderedDict
from pathlib import Path
from typing import Optional

from recoil.api.schemas.engine import (
    SCHEMA_VERSION,
    MemoryEntry,
)
from recoil.core.paths import RECOIL_ROOT

logger = logging.getLogger(__name__)


def _memory_dir() -> Path:
    override = os.environ.get("RECOIL_ENGINE_MEMORY_DIR")
    if override:
        return Path(override).expanduser()
    return RECOIL_ROOT / "engine-memory"


# Heading line for a learning: "### YYYY-MM-DD [scope]" — the first one is
# the section title, then ID/Learning/Status fields.
_LEARNING_HEADING = re.compile(r"^###\s+(?P<date>\d{4}-\d{2}-\d{2})\s+\[(?P<scope>[^\]]+)\]")
# Anti-pattern heading: "### Pattern Name" (first line of an anti-pattern block).
_ANTI_HEADING = re.compile(r"^###\s+(?P<name>(?!\d{4}-\d{2}-\d{2}).+)")


def _read_text(path: Path) -> str:
    try:
        return path.read_text(encoding="utf-8")
    except OSError as exc:
        logger.warning("engine-memory file %s unreadable: %s", path, exc)
        return ""


def _strip_md(text: str) -> str:
    """Cheap markdown → plain text for the `text` field."""
    text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text)
    text = re.sub(r"__([^_]+)__", r"\1", text)
    text = re.sub(r"\*([^*]+)\*", r"\1", text)
    text = re.sub(r"`([^`]+)`", r"\1", text)
    return text.strip()


def _make_id_synthesizer() -> "callable[[str, Optional[str]], str]":
    """Return a per-parse fallback-id allocator.

    Bug-fix (Debug R7 BUG-5): two LEARNINGS.md entries dated the same day
    without explicit ``**ID:**`` would otherwise both synthesize ``L-{date}``
    — toggling one in the Console UI would mutate the overlay for both.
    The closure tracks already-issued synthetic bases per parse call; a
    collision on the base appends ``-2``, ``-3``, … so every emitted ID is
    unique within the file. Explicit IDs are returned unchanged (callers
    are responsible for uniqueness in markdown).

    Reset semantics: a fresh closure is built per ``_parse_*`` call (and
    therefore per ``list_memory()`` invocation), so collisions from one
    parse never leak into the next.
    """
    seen: dict[str, int] = {}

    def synth(base: str, explicit: Optional[str]) -> str:
        if explicit:
            return explicit
        if base not in seen:
            seen[base] = 1
            return base
        seen[base] += 1
        return f"{base}-{seen[base]}"

    return synth


def _parse_learnings(md: str) -> list[MemoryEntry]:
    """Parse LEARNINGS.md into MemoryEntry rows.

    Each block starts with `### YYYY-MM-DD [scope]` then a sequence of
    bold-prefixed fields: ID, Learning, Evidence, Implication, Status, etc.
    The MemoryEntry.text is the Learning + Implication concatenated.
    """
    out: list[MemoryEntry] = []
    synth_id = _make_id_synthesizer()
    lines = md.splitlines()
    i = 0
    while i < len(lines):
        m = _LEARNING_HEADING.match(lines[i])
        if not m:
            i += 1
            continue
        scope = m.group("scope").strip()
        date = m.group("date")
        # Walk the block until next ###
        j = i + 1
        block: list[str] = []
        while j < len(lines) and not lines[j].startswith("### "):
            block.append(lines[j])
            j += 1
        fields = _extract_bold_fields(block)
        entry_id = synth_id(f"L-{date}", fields.get("id"))
        learning = fields.get("learning") or ""
        implication = fields.get("implication") or ""
        body = learning if not implication else f"{learning} → {implication}"
        status = (fields.get("status") or "provisional").lower()
        confidence = 0.6 if status == "provisional" else 0.9 if status == "confirmed" else 0.4
        on = status != "stale"
        out.append(
            MemoryEntry(
                schema_version=SCHEMA_VERSION,
                id=str(entry_id),
                kind="learning",
                scope=scope,
                text=_strip_md(body),
                confidence=confidence,
                hits=1,
                on=on,
            )
        )
        i = j
    return out


def _parse_anti_patterns(md: str) -> list[MemoryEntry]:
    """Parse ANTI_PATTERNS.md into MemoryEntry rows.

    The file is grouped by category (Script/Visual/Production/Development
    Anti-Patterns). Each entry starts with `### Pattern Name` then bold
    fields: ID, Status, What it is, Why it fails, Instead, do.

    Bug-fix (Debug R7 BUG-5): wires the per-parse ID synthesizer used by
    ``_parse_learnings`` for symmetry. Today every anti-pattern entry
    carries an explicit ``ID:`` and the skip-when-missing branch keeps the
    parser tight, so the synthesizer never has to assign a fallback. The
    seen-id bookkeeping still gives explicit IDs a uniqueness guarantee
    against curator typo: a duplicate explicit ``A001`` becomes ``A001-2``
    etc., matching the LEARNINGS.md collision shape.
    """
    out: list[MemoryEntry] = []
    synth_id = _make_id_synthesizer()
    lines = md.splitlines()
    current_category = "anti-pattern"
    i = 0
    while i < len(lines):
        line = lines[i]
        # Track h2 categories: "## Script Anti-Patterns" etc.
        if line.startswith("## ") and "Anti-Pattern" in line:
            current_category = line[3:].strip().lower().replace(" ", "_")
            i += 1
            continue
        # Heuristic — skip "## Retired" entries
        if line.strip() == "## Retired":
            break
        m = _ANTI_HEADING.match(line)
        if not m:
            i += 1
            continue
        name = m.group("name").strip()
        j = i + 1
        block: list[str] = []
        while j < len(lines) and not lines[j].startswith("### ") and not lines[j].startswith("## "):
            block.append(lines[j])
            j += 1
        fields = _extract_bold_fields(block)
        if not fields.get("id"):
            # Not really an anti-pattern entry — skip
            i = j
            continue
        what = fields.get("what_it_is") or ""
        why = fields.get("why_it_fails") or ""
        instead = fields.get("instead_do") or fields.get("instead,_do") or ""
        body = " | ".join([s for s in (what, why, f"Instead: {instead}" if instead else "") if s])
        status = (fields.get("status") or "provisional").lower()
        confidence = 0.6 if status == "provisional" else 0.9 if status == "active" else 0.3
        on = status != "stale"
        # Synthesizer dedups via the synthetic base. We treat the explicit ID
        # as the base so a curator typo (two ``A001`` entries) emits
        # ``A001`` + ``A001-2`` rather than two indistinguishable rows.
        raw_id = str(fields["id"])
        entry_id = synth_id(raw_id, None)
        out.append(
            MemoryEntry(
                schema_version=SCHEMA_VERSION,
                id=entry_id,
                kind="anti-pattern",
                scope=current_category,
                text=_strip_md(f"{name} — {body}"),
                confidence=confidence,
                hits=1,
                on=on,
            )
        )
        i = j
    return out


_BOLD_FIELD_RE = re.compile(r"^\s*-\s*\*\*([^:*]+):\*\*\s*(.*)$")


def _extract_bold_fields(block: list[str]) -> dict[str, str]:
    """Parse a contiguous bullet-list of `- **Field:** value` rows."""
    fields: dict[str, str] = {}
    current_key: Optional[str] = None
    for raw in block:
        m = _BOLD_FIELD_RE.match(raw)
        if m:
            key = m.group(1).strip().lower().replace(" ", "_")
            val = m.group(2).strip()
            fields[key] = val
            current_key = key
            continue
        # Continuation line for prior field
        if current_key and raw.strip() and not raw.lstrip().startswith("-"):
            fields[current_key] = (fields[current_key] + " " + raw.strip()).strip()
    return fields


# ── Phase 19 — runtime overlay for toggle ─────────────────────────────────
#
# engine-memory/*.md is JT-curated. We do NOT mutate the markdown via the
# HTTP API. Instead we keep a process-local overlay map of {entry_id → on}
# applied on top of the parsed markdown view. Restarts reset the overlay.
# When CP-N+ ships a real persistence layer this overlay collapses into it.
#
# Bug-fix (Debug R2): bounded LRU. A misbehaving caller firing toggles at
# random ids cannot grow this dict unbounded — when the cap is reached we
# drop the oldest insertion via OrderedDict.popitem(last=False).
_OVERLAY_MAX_ENTRIES = 1000
_OVERLAY: "OrderedDict[str, bool]" = OrderedDict()


def _overlay_set(entry_id: str, on: bool) -> bool:
    """Insert/update ``entry_id`` in the overlay; trim to cap if needed.

    Returns the value that was stored (== ``on``). Updating an existing key
    refreshes its position to "most-recently-set" so heavily-used entries
    survive trims; new inserts at cap drop the oldest first.
    """
    if entry_id in _OVERLAY:
        # Move to end (most-recently-set) before re-storing.
        _OVERLAY.move_to_end(entry_id, last=True)
    elif len(_OVERLAY) >= _OVERLAY_MAX_ENTRIES:
        _OVERLAY.popitem(last=False)
    _OVERLAY[entry_id] = bool(on)
    return _OVERLAY[entry_id]


def _reset_overlay_for_tests() -> None:  # pragma: no cover — test helper
    _OVERLAY.clear()


def list_memory() -> list[MemoryEntry]:
    """Aggregate LEARNINGS + ANTI_PATTERNS into a flat list, applying overlay."""
    d = _memory_dir()
    out: list[MemoryEntry] = []
    learnings = d / "LEARNINGS.md"
    if learnings.exists():
        out.extend(_parse_learnings(_read_text(learnings)))
    anti = d / "ANTI_PATTERNS.md"
    if anti.exists():
        out.extend(_parse_anti_patterns(_read_text(anti)))
    if not _OVERLAY:
        return out
    # Apply overlay — recreate frozen pydantic instances with updated `on`.
    patched: list[MemoryEntry] = []
    for e in out:
        if e.id in _OVERLAY:
            patched.append(e.model_copy(update={"on": _OVERLAY[e.id]}))
        else:
            patched.append(e)
    return patched


def toggle_entry(entry_id: str) -> dict:
    """Flip the on/off bit for ``entry_id`` in the runtime overlay.

    If the entry was never seen we still register a flip (defaults to
    "on" → flips to off). The HTTP route does NOT need to validate the
    id against the parsed list — this matches the Phase 17 stub semantics
    (any id accepted) so the UI keeps round-tripping. (HTTP-level shape
    validation lives in mutation_routes.py per Debug R2.)
    """
    cur = _OVERLAY.get(entry_id)
    if cur is None:
        # Look up the markdown's notion of `on` for this id; flip from there.
        seen_on: Optional[bool] = None
        for e in list_memory():
            if e.id == entry_id:
                seen_on = e.on
                break
        cur = seen_on if seen_on is not None else True
    new_value = _overlay_set(entry_id, not cur)
    return {"entry_id": entry_id, "on": new_value}


def set_entry(entry_id: str, on: bool) -> dict:
    """Idempotent set of the on/off bit for ``entry_id``.

    Debug R1 fix — clients that want safe-to-retry semantics call this
    instead of ``toggle_entry``. The HTTP route accepts an optional
    ``{value: bool}`` body; when present it routes here.
    """
    new_value = _overlay_set(entry_id, on)
    return {"entry_id": entry_id, "on": new_value}


__all__ = ["list_memory", "toggle_entry", "set_entry"]
