"""Memory adapter — reads recoil/engine-memory/{LEARNINGS, ANTI_PATTERNS}.md and emits MemoryEntry rows. These markdown files are the engine's persistent learning surface (see recoil/CLAUDE.md "Engine Memory"). Phase 16 exposes them read-only over HTTP so the Console v2 EngineMemoryInspector tab can render them. The markdown source remains the SSOT — editing happens via /analyze-production and direct git commits, not via the API. This adapter is READ-ONLY. It never writes to engine-memory/. """ from __future__ import annotations import logging import os import re from collections import OrderedDict from pathlib import Path from typing import Optional from recoil.api.schemas.engine import ( SCHEMA_VERSION, MemoryEntry, ) from recoil.core.paths import RECOIL_ROOT logger = logging.getLogger(__name__) def _memory_dir() -> Path: override = os.environ.get("RECOIL_ENGINE_MEMORY_DIR") if override: return Path(override).expanduser() return RECOIL_ROOT / "engine-memory" # Heading line for a learning: "### YYYY-MM-DD [scope]" — the first one is # the section title, then ID/Learning/Status fields. _LEARNING_HEADING = re.compile(r"^###\s+(?P\d{4}-\d{2}-\d{2})\s+\[(?P[^\]]+)\]") # Anti-pattern heading: "### Pattern Name" (first line of an anti-pattern block). _ANTI_HEADING = re.compile(r"^###\s+(?P(?!\d{4}-\d{2}-\d{2}).+)") def _read_text(path: Path) -> str: try: return path.read_text(encoding="utf-8") except OSError as exc: logger.warning("engine-memory file %s unreadable: %s", path, exc) return "" def _strip_md(text: str) -> str: """Cheap markdown → plain text for the `text` field.""" text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text) text = re.sub(r"__([^_]+)__", r"\1", text) text = re.sub(r"\*([^*]+)\*", r"\1", text) text = re.sub(r"`([^`]+)`", r"\1", text) return text.strip() def _make_id_synthesizer() -> "callable[[str, Optional[str]], str]": """Return a per-parse fallback-id allocator. Bug-fix (Debug R7 BUG-5): two LEARNINGS.md entries dated the same day without explicit ``**ID:**`` would otherwise both synthesize ``L-{date}`` — toggling one in the Console UI would mutate the overlay for both. The closure tracks already-issued synthetic bases per parse call; a collision on the base appends ``-2``, ``-3``, … so every emitted ID is unique within the file. Explicit IDs are returned unchanged (callers are responsible for uniqueness in markdown). Reset semantics: a fresh closure is built per ``_parse_*`` call (and therefore per ``list_memory()`` invocation), so collisions from one parse never leak into the next. """ seen: dict[str, int] = {} def synth(base: str, explicit: Optional[str]) -> str: if explicit: return explicit if base not in seen: seen[base] = 1 return base seen[base] += 1 return f"{base}-{seen[base]}" return synth def _parse_learnings(md: str) -> list[MemoryEntry]: """Parse LEARNINGS.md into MemoryEntry rows. Each block starts with `### YYYY-MM-DD [scope]` then a sequence of bold-prefixed fields: ID, Learning, Evidence, Implication, Status, etc. The MemoryEntry.text is the Learning + Implication concatenated. """ out: list[MemoryEntry] = [] synth_id = _make_id_synthesizer() lines = md.splitlines() i = 0 while i < len(lines): m = _LEARNING_HEADING.match(lines[i]) if not m: i += 1 continue scope = m.group("scope").strip() date = m.group("date") # Walk the block until next ### j = i + 1 block: list[str] = [] while j < len(lines) and not lines[j].startswith("### "): block.append(lines[j]) j += 1 fields = _extract_bold_fields(block) entry_id = synth_id(f"L-{date}", fields.get("id")) learning = fields.get("learning") or "" implication = fields.get("implication") or "" body = learning if not implication else f"{learning} → {implication}" status = (fields.get("status") or "provisional").lower() confidence = 0.6 if status == "provisional" else 0.9 if status == "confirmed" else 0.4 on = status != "stale" out.append( MemoryEntry( schema_version=SCHEMA_VERSION, id=str(entry_id), kind="learning", scope=scope, text=_strip_md(body), confidence=confidence, hits=1, on=on, ) ) i = j return out def _parse_anti_patterns(md: str) -> list[MemoryEntry]: """Parse ANTI_PATTERNS.md into MemoryEntry rows. The file is grouped by category (Script/Visual/Production/Development Anti-Patterns). Each entry starts with `### Pattern Name` then bold fields: ID, Status, What it is, Why it fails, Instead, do. Bug-fix (Debug R7 BUG-5): wires the per-parse ID synthesizer used by ``_parse_learnings`` for symmetry. Today every anti-pattern entry carries an explicit ``ID:`` and the skip-when-missing branch keeps the parser tight, so the synthesizer never has to assign a fallback. The seen-id bookkeeping still gives explicit IDs a uniqueness guarantee against curator typo: a duplicate explicit ``A001`` becomes ``A001-2`` etc., matching the LEARNINGS.md collision shape. """ out: list[MemoryEntry] = [] synth_id = _make_id_synthesizer() lines = md.splitlines() current_category = "anti-pattern" i = 0 while i < len(lines): line = lines[i] # Track h2 categories: "## Script Anti-Patterns" etc. if line.startswith("## ") and "Anti-Pattern" in line: current_category = line[3:].strip().lower().replace(" ", "_") i += 1 continue # Heuristic — skip "## Retired" entries if line.strip() == "## Retired": break m = _ANTI_HEADING.match(line) if not m: i += 1 continue name = m.group("name").strip() j = i + 1 block: list[str] = [] while j < len(lines) and not lines[j].startswith("### ") and not lines[j].startswith("## "): block.append(lines[j]) j += 1 fields = _extract_bold_fields(block) if not fields.get("id"): # Not really an anti-pattern entry — skip i = j continue what = fields.get("what_it_is") or "" why = fields.get("why_it_fails") or "" instead = fields.get("instead_do") or fields.get("instead,_do") or "" body = " | ".join([s for s in (what, why, f"Instead: {instead}" if instead else "") if s]) status = (fields.get("status") or "provisional").lower() confidence = 0.6 if status == "provisional" else 0.9 if status == "active" else 0.3 on = status != "stale" # Synthesizer dedups via the synthetic base. We treat the explicit ID # as the base so a curator typo (two ``A001`` entries) emits # ``A001`` + ``A001-2`` rather than two indistinguishable rows. raw_id = str(fields["id"]) entry_id = synth_id(raw_id, None) out.append( MemoryEntry( schema_version=SCHEMA_VERSION, id=entry_id, kind="anti-pattern", scope=current_category, text=_strip_md(f"{name} — {body}"), confidence=confidence, hits=1, on=on, ) ) i = j return out _BOLD_FIELD_RE = re.compile(r"^\s*-\s*\*\*([^:*]+):\*\*\s*(.*)$") def _extract_bold_fields(block: list[str]) -> dict[str, str]: """Parse a contiguous bullet-list of `- **Field:** value` rows.""" fields: dict[str, str] = {} current_key: Optional[str] = None for raw in block: m = _BOLD_FIELD_RE.match(raw) if m: key = m.group(1).strip().lower().replace(" ", "_") val = m.group(2).strip() fields[key] = val current_key = key continue # Continuation line for prior field if current_key and raw.strip() and not raw.lstrip().startswith("-"): fields[current_key] = (fields[current_key] + " " + raw.strip()).strip() return fields # ── Phase 19 — runtime overlay for toggle ───────────────────────────────── # # engine-memory/*.md is JT-curated. We do NOT mutate the markdown via the # HTTP API. Instead we keep a process-local overlay map of {entry_id → on} # applied on top of the parsed markdown view. Restarts reset the overlay. # When CP-N+ ships a real persistence layer this overlay collapses into it. # # Bug-fix (Debug R2): bounded LRU. A misbehaving caller firing toggles at # random ids cannot grow this dict unbounded — when the cap is reached we # drop the oldest insertion via OrderedDict.popitem(last=False). _OVERLAY_MAX_ENTRIES = 1000 _OVERLAY: "OrderedDict[str, bool]" = OrderedDict() def _overlay_set(entry_id: str, on: bool) -> bool: """Insert/update ``entry_id`` in the overlay; trim to cap if needed. Returns the value that was stored (== ``on``). Updating an existing key refreshes its position to "most-recently-set" so heavily-used entries survive trims; new inserts at cap drop the oldest first. """ if entry_id in _OVERLAY: # Move to end (most-recently-set) before re-storing. _OVERLAY.move_to_end(entry_id, last=True) elif len(_OVERLAY) >= _OVERLAY_MAX_ENTRIES: _OVERLAY.popitem(last=False) _OVERLAY[entry_id] = bool(on) return _OVERLAY[entry_id] def _reset_overlay_for_tests() -> None: # pragma: no cover — test helper _OVERLAY.clear() def list_memory() -> list[MemoryEntry]: """Aggregate LEARNINGS + ANTI_PATTERNS into a flat list, applying overlay.""" d = _memory_dir() out: list[MemoryEntry] = [] learnings = d / "LEARNINGS.md" if learnings.exists(): out.extend(_parse_learnings(_read_text(learnings))) anti = d / "ANTI_PATTERNS.md" if anti.exists(): out.extend(_parse_anti_patterns(_read_text(anti))) if not _OVERLAY: return out # Apply overlay — recreate frozen pydantic instances with updated `on`. patched: list[MemoryEntry] = [] for e in out: if e.id in _OVERLAY: patched.append(e.model_copy(update={"on": _OVERLAY[e.id]})) else: patched.append(e) return patched def toggle_entry(entry_id: str) -> dict: """Flip the on/off bit for ``entry_id`` in the runtime overlay. If the entry was never seen we still register a flip (defaults to "on" → flips to off). The HTTP route does NOT need to validate the id against the parsed list — this matches the Phase 17 stub semantics (any id accepted) so the UI keeps round-tripping. (HTTP-level shape validation lives in mutation_routes.py per Debug R2.) """ cur = _OVERLAY.get(entry_id) if cur is None: # Look up the markdown's notion of `on` for this id; flip from there. seen_on: Optional[bool] = None for e in list_memory(): if e.id == entry_id: seen_on = e.on break cur = seen_on if seen_on is not None else True new_value = _overlay_set(entry_id, not cur) return {"entry_id": entry_id, "on": new_value} def set_entry(entry_id: str, on: bool) -> dict: """Idempotent set of the on/off bit for ``entry_id``. Debug R1 fix — clients that want safe-to-retry semantics call this instead of ``toggle_entry``. The HTTP route accepts an optional ``{value: bool}`` body; when present it routes here. """ new_value = _overlay_set(entry_id, on) return {"entry_id": entry_id, "on": new_value} __all__ = ["list_memory", "toggle_entry", "set_entry"]