#!/usr/bin/env python3
"""Backfill canonical episode_id (and scene_id placeholder) into shot JSON state.

Idempotent: re-running on an already-backfilled project is a no-op.

Usage:
  python3 recoil/pipeline/cli/backfill_state.py <project>          # backfill all
  python3 recoil/pipeline/cli/backfill_state.py <project> --field episode_id

For projects with project_type=client_video, the script verifies the config
flag is set; it does NOT synthesize episode_id (those projects don't have one).
"""
from __future__ import annotations

import argparse
import json
import re
import sys

from recoil.core.paths import projects_root, ProjectPaths

# Match EP001, EP01, EP1 as the canonical episode prefix.
EPISODE_PREFIX_RE = re.compile(r"^(EP\d{1,4})(?:_|$)")


def derive_episode_id(stem: str) -> str | None:
    """Return the EP-prefixed episode id from a filename stem, or None."""
    m = EPISODE_PREFIX_RE.match(stem)
    return m.group(1) if m else None


def backfill_project(project: str, field: str = "episode_id", *, dry_run: bool = False) -> int:
    """Walk projects/<project>/state/visual/**/*.json, populate `field` if absent.

    Returns count of files written.
    """
    root = ProjectPaths.for_project(project).visual_state_dir
    if not root.exists():
        print(f"[backfill_state] No state dir at {root}; nothing to do", file=sys.stderr)
        return 0

    config_path = projects_root() / project / "project_config.json"
    project_type = "microdrama"
    if config_path.exists():
        try:
            project_type = json.loads(config_path.read_text()).get("project_type", "microdrama")
        except json.JSONDecodeError:
            pass

    if project_type == "client_video":
        print(f"[backfill_state] {project} is client_video — no episode_id backfill needed")
        return 0

    written = 0
    for jpath in sorted(root.rglob("*.json")):
        if not jpath.is_file():
            continue  # some state stores are directories named `*.json`
        try:
            data = json.loads(jpath.read_text())
        except json.JSONDecodeError:
            print(f"[backfill_state] SKIP corrupt JSON: {jpath}", file=sys.stderr)
            continue
        if not isinstance(data, dict):
            continue
        if data.get(field):
            continue  # already populated
        derived = derive_episode_id(jpath.stem)
        if derived is None:
            print(f"[backfill_state] SKIP undecidable: {jpath.stem}", file=sys.stderr)
            continue
        data[field] = derived
        if dry_run:
            print(f"[backfill_state] DRY: would write {field}={derived} to {jpath}")
        else:
            jpath.write_text(json.dumps(data, indent=2))
            written += 1
    print(f"[backfill_state] {project}: wrote {written} files (field={field})")
    return written


def main() -> int:
    p = argparse.ArgumentParser()
    p.add_argument("project")
    p.add_argument("--field", default="episode_id")
    p.add_argument("--dry-run", action="store_true")
    args = p.parse_args()
    backfill_project(args.project, field=args.field, dry_run=args.dry_run)
    return 0


if __name__ == "__main__":
    sys.exit(main())
