#!/usr/bin/env python3
"""
consult.py — Multi-round architecture consultation tool.

Supports four engines:
  - codex (default): OpenAI GPT-5.x via the Codex CLI plan (no API cost,
    read-only repo grounding) — the free cross-lab second engine
  - opus: Claude Opus 4.8 via local tmux + subscription (or Anthropic SDK)
  - gemini: Gemini 3.1 Pro via Google GenAI SDK (PAID — opt-in 3rd-lab take)
  - opus-api: Claude Opus via the Anthropic API directly

Reasoning effort (opus/tmux engine only): low / medium / high / xhigh / max
passed straight through to `claude --effort`. Picks where the cost matters
(R2 deliberation, adversarial review) and stays low elsewhere.

Sends project context + questions to the chosen model, supports multi-round
back-and-forth with automatic context accumulation.

Usage:
    # Round 1: Gemini consultation (default)
    python3 tools/consult.py \
        --context consultations/topic/context.md \
        --prompt consultations/topic/round_1_prompt.md \
        --output consultations/topic/gemini_round_1.md

    # Round 1: Opus consultation
    python3 tools/consult.py --engine opus \
        --context consultations/topic/context.md \
        --prompt consultations/topic/round_1_prompt.md \
        --output consultations/topic/opus_round_1.md

    # Round N: Follow-up (auto-loads prior rounds from output dir)
    python3 tools/consult.py \
        --context consultations/topic/context.md \
        --follow-up consultations/topic/claude_round_1.md \
        --output consultations/topic/gemini_round_2.md \
        --prior-dir consultations/topic/

    # Quick single-round consultation
    python3 tools/consult.py \
        --context-text "Review this code..." \
        --output /dev/stdout
"""

import argparse
import json
import os
import re
import subprocess
import sys
import time
from datetime import date
from pathlib import Path

PIPELINE_ROOT_LOCAL = Path(__file__).parent.parent
RECOIL_ROOT_LOCAL = PIPELINE_ROOT_LOCAL.parent
REPO_ROOT_LOCAL = RECOIL_ROOT_LOCAL.parent
# Bootstrap RECOIL_ROOT first so bare `from core.paths` resolves to recoil/core/
# (top-level), not recoil/pipeline/core/ (which would shadow it). Also put the
# REPO ROOT (parent of recoil/) on sys.path so the `recoil.`-prefixed imports
# below resolve when run as a bare script with no PYTHONPATH. Then
# ensure_pipeline_importable() adds PIPELINE_ROOT and locks the order.
if str(RECOIL_ROOT_LOCAL) not in sys.path:
    sys.path.insert(0, str(RECOIL_ROOT_LOCAL))
if str(REPO_ROOT_LOCAL) not in sys.path:
    sys.path.insert(1, str(REPO_ROOT_LOCAL))
from recoil.core.paths import ensure_pipeline_importable

ensure_pipeline_importable()
from recoil.core.model_profiles import get_model

sys.path.insert(0, os.path.expanduser("~/CLAUDE_PROJECTS/cost-ledger"))
try:
    from log_cost import log_cost as _log_cost
except ImportError:
    _log_cost = None

DEFAULT_MODEL = get_model("consult", "text")

# --- Google Drive archival via gws CLI ---

DRIVE_CONSULTATIONS_FOLDER = "Consultations"


def _gws_run(args: list[str], quiet: bool = True) -> dict | None:
    """Run a gws CLI command and return parsed JSON, or None on failure."""
    cmd = ["gws"] + args
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        output = result.stdout.strip()
        # gws may prefix output with "Using keyring backend: keyring"
        if output.startswith("Using keyring"):
            output = output.split("\n", 1)[-1].strip()
        if output:
            return json.loads(output)
    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError) as e:
        if not quiet:
            print(f"WARNING: gws command failed: {e}", file=sys.stderr)
    return None


def _find_or_create_folder(name: str, parent_id: str = "root") -> str | None:
    """Find a folder by name under parent, or create it. Returns folder ID."""
    # Search for existing folder
    query = f'name = "{name}" and mimeType = "application/vnd.google-apps.folder" and "{parent_id}" in parents and trashed = false'
    result = _gws_run(
        [
            "drive",
            "files",
            "list",
            "--params",
            json.dumps({"q": query, "fields": "files(id,name)", "pageSize": 1}),
        ]
    )
    if result and result.get("files"):
        return result["files"][0]["id"]
    # Create folder
    body = {
        "name": name,
        "mimeType": "application/vnd.google-apps.folder",
        "parents": [parent_id],
    }
    result = _gws_run(
        [
            "drive",
            "files",
            "create",
            "--json",
            json.dumps(body),
            "--params",
            json.dumps({"fields": "id"}),
        ]
    )
    if result and result.get("id"):
        return result["id"]
    return None


def _upload_to_drive(
    local_path: Path, folder_id: str, drive_name: str = None
) -> str | None:
    """Upload a file to a Drive folder. Returns file ID or None.

    gws restricts uploads to files within cwd, so we cd to the file's directory.
    """
    local_path = local_path.resolve()
    name = drive_name or local_path.name
    body = {"name": name, "parents": [folder_id]}
    cmd = [
        "gws",
        "drive",
        "files",
        "create",
        "--json",
        json.dumps(body),
        "--upload",
        local_path.name,
        "--params",
        json.dumps({"fields": "id"}),
    ]
    try:
        result = subprocess.run(
            cmd, capture_output=True, text=True, timeout=30, cwd=str(local_path.parent)
        )
        output = result.stdout.strip()
        if output.startswith("Using keyring"):
            output = output.split("\n", 1)[-1].strip()
        if output:
            parsed = json.loads(output)
            if parsed.get("id"):
                return parsed["id"]
    except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError):
        pass
    return None


def _upload_text_to_drive(text: str, filename: str, folder_id: str) -> str | None:
    """Write text to a temp file and upload it to Drive."""
    import tempfile

    suffix = Path(filename).suffix or ".md"
    with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False) as f:
        f.write(text)
        tmp_path = Path(f.name)
    try:
        return _upload_to_drive(tmp_path, folder_id, drive_name=filename)
    finally:
        tmp_path.unlink(missing_ok=True)


def archive_to_drive(
    topic: str, files: dict[str, str | Path], quiet: bool = False
) -> str | None:
    """Archive consultation files to Drive/Consultations/{topic}-{date}/.

    Args:
        topic: Consultation topic (used in folder name)
        files: Dict of {display_name: content_or_path}. If value is a Path, uploads
               the file. If value is a str, uploads as text.

    Returns:
        Drive folder ID, or None if archival failed.
    """
    # Find or create top-level Consultations folder
    root_id = _find_or_create_folder(DRIVE_CONSULTATIONS_FOLDER)
    if not root_id:
        if not quiet:
            print(
                "WARNING: Could not create Consultations folder in Drive",
                file=sys.stderr,
            )
        return None

    # Create dated subfolder for this consultation
    folder_name = f"{topic}-{date.today().isoformat()}"
    folder_id = _find_or_create_folder(folder_name, parent_id=root_id)
    if not folder_id:
        if not quiet:
            print(
                f"WARNING: Could not create Drive folder: {folder_name}",
                file=sys.stderr,
            )
        return None

    # Upload each file
    uploaded = 0
    for name, content in files.items():
        if isinstance(content, Path):
            if content.exists():
                result = _upload_to_drive(content, folder_id, drive_name=name)
                if result:
                    uploaded += 1
        elif isinstance(content, str) and content.strip():
            result = _upload_text_to_drive(content, name, folder_id)
            if result:
                uploaded += 1

    if not quiet:
        print(
            f"Drive archive: {uploaded}/{len(files)} files → {DRIVE_CONSULTATIONS_FOLDER}/{folder_name}/",
            file=sys.stderr,
        )

    return folder_id


def load_file(path: Path, label: str = None) -> str:
    """Load a file with a labeled header."""
    if not path.exists():
        return f"\n{'=' * 60}\n[FILE NOT FOUND: {path}]\n{'=' * 60}\n"
    content = path.read_text()
    label = label or path.name
    return f"\n{'=' * 60}\n[FILE: {label}]\n[PATH: {path}]\n{'=' * 60}\n{content}\n"


def load_prior_rounds(prior_dir: Path) -> str:
    """Load all prior consultation responses for context accumulation.

    Looks for both gemini_round_N.md and opus_round_N.md files,
    plus claude_round_N.md (the builder's replies).
    """
    prior_context = ""
    round_num = 1
    while True:
        gemini_file = prior_dir / f"gemini_round_{round_num}.md"
        opus_file = prior_dir / f"opus_round_{round_num}.md"
        claude_file = prior_dir / f"claude_round_{round_num}.md"

        # Accept either engine's response file
        consultant_file = None
        consultant_label = None
        if gemini_file.exists():
            consultant_file = gemini_file
            consultant_label = "GEMINI"
        elif opus_file.exists():
            consultant_file = opus_file
            consultant_label = "OPUS"

        if not consultant_file:
            break

        prior_context += (
            f"\n\n--- {consultant_label} RESPONSE (Round {round_num}) ---\n\n"
        )
        prior_context += consultant_file.read_text()

        if claude_file.exists():
            prior_context += f"\n\n--- BUILDER RESPONSE (Round {round_num}) ---\n\n"
            prior_context += claude_file.read_text()

        round_num += 1

    return prior_context


def auto_package_project(project_dir: Path, include_patterns: list[str] = None) -> str:
    """Auto-discover and package project context from a directory."""
    bundle_parts = []

    # Look for key files in standard locations
    key_files = [
        ("CLAUDE.md", "Project instructions"),
        ("config/pipeline_config.json", "Project configuration"),
        ("config/model_profiles.json", "Model profiles"),
    ]

    for rel_path, label in key_files:
        full_path = project_dir / rel_path
        if full_path.exists():
            bundle_parts.append(load_file(full_path, label))

    # Auto-discover Python source files
    for subdir in ("lib", "orchestrator", "tools", "editors"):
        src_dir = project_dir / subdir
        if src_dir.exists():
            for py_file in sorted(src_dir.glob("*.py")):
                if py_file.name == "__init__.py":
                    continue
                bundle_parts.append(load_file(py_file, f"{subdir}/{py_file.name}"))

    # Include specific patterns if requested
    if include_patterns:
        for pattern in include_patterns:
            for match in sorted(project_dir.glob(pattern)):
                if match.is_file():
                    rel = match.relative_to(project_dir)
                    bundle_parts.append(load_file(match, str(rel)))

    return "\n".join(bundle_parts)


def run_consultation(
    context: str,
    prompt: str,
    model: str = DEFAULT_MODEL,
    follow_up: str = None,
    prior_context: str = None,
    temperature: float = 0.7,
    max_tokens: int = 16384,
    grounding: bool = False,
) -> str:
    """Send a consultation prompt to Gemini and return the response."""
    from google import genai

    api_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("ERROR: GOOGLE_API_KEY or GEMINI_API_KEY not set", file=sys.stderr)
        sys.exit(1)

    client = genai.Client(api_key=api_key)

    # Build the full prompt
    full_prompt = prompt

    if context:
        full_prompt += "\n\n---\n\n# PROJECT CONTEXT\n\n" + context

    if prior_context:
        full_prompt += "\n\n---\n\n# PRIOR CONSULTATION ROUNDS\n\n" + prior_context

    if follow_up:
        full_prompt += "\n\n---\n\n# CLAUDE'S RESPONSE (current round)\n\n" + follow_up

    # Stats
    char_count = len(full_prompt)
    est_tokens = char_count // 4
    print(f"Prompt: {char_count:,} chars (~{est_tokens:,} tokens)", file=sys.stderr)
    print(f"Model: {model}", file=sys.stderr)
    if grounding:
        print("Grounding: Google Search enabled", file=sys.stderr)
    print("Sending...", file=sys.stderr)

    config = {
        "temperature": temperature,
        "max_output_tokens": max_tokens,
    }
    if grounding:
        config["tools"] = [{"google_search": {}}]

    t0 = time.time()
    try:
        response = client.models.generate_content(
            model=model,
            contents=full_prompt,
            config=config,
        )
    except Exception as e:
        print(f"ERROR: API call failed: {e}", file=sys.stderr)
        sys.exit(1)

    elapsed = time.time() - t0
    response_text = response.text if hasattr(response, "text") else str(response)

    try:
        um = getattr(response, "usage_metadata", None)
        if um and _log_cost:
            in_t = getattr(um, "prompt_token_count", 0) or 0
            out_t = getattr(um, "candidates_token_count", 0) or 0
            g_cost = (in_t * 1.25 + out_t * 10.00) / 1_000_000
            _log_cost(
                "consult",
                "gemini",
                model,
                g_cost,
                units="tokens",
                count=in_t + out_t,
                context=model,
            )
            print(f"  ({in_t:,} in / {out_t:,} out, ~${g_cost:.3f})", file=sys.stderr)
    except Exception:
        pass

    print(f"Response: {len(response_text):,} chars in {elapsed:.1f}s", file=sys.stderr)
    return response_text


OPUS_MODEL = "claude-opus-4-8"  # Updated 2026-05-28: 4-6 → 4-8. 3x cheaper than 4-7, 4x less likely to miss code flaws.

# Per-million-token rates for opus-api cost-ledger logging. Verified against
# https://platform.claude.com/docs/en/about-claude/pricing on 2026-05-28.
# Standard mode only (no fast-mode multiplier, no caching, no batch discount).
# Opus 4.5–4.8 all share $5 / $25; Opus 4.1 and earlier were $15 / $75.
# Adding a model = one row. Unknown models → no cost logged (better than wrong).
OPUS_PRICING_PER_M_USD = {
    "claude-opus-4-8": (5.0, 25.0),
    "claude-opus-4-7": (5.0, 25.0),
    "claude-opus-4-6": (5.0, 25.0),
    "claude-opus-4-5": (5.0, 25.0),
    "claude-opus-4-1": (15.0, 75.0),
}


def _opus_label_from_model(model: str) -> str:
    """Friendly label for log scrapers: 'claude-opus-4-8' → 'Opus 4.8'.

    Falls back to the raw model id (or '<unknown>' if model is None/empty)
    if the pattern doesn't match. Anchors on the first two version segments
    so suffixes like 'claude-opus-4-9-preview' produce 'Opus 4.9' (still
    greppable as `Opus 4\\.\\d`), not 'Opus 4.9.preview'.
    """
    if not model:
        return "<unknown>"
    prefix = "claude-opus-"
    if model.startswith(prefix):
        tail = model[len(prefix) :]
        parts = tail.split("-")
        # Take the first two segments as the version (major.minor); discard suffix.
        if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit():
            return f"Opus {parts[0]}.{parts[1]}"
        # Single-segment or non-numeric tail — return as-is, dots not dashes.
        return f"Opus {tail.replace('-', '.')}"
    return model


# Prepended to every Opus user message. The opus engine captures `claude --print`
# stdout verbatim; if the model role-plays "I wrote the deliverable to <file>" and
# emits only a sign-off, that sign-off becomes the entire saved artifact (the recurring
# truncated-capture bug — root-caused 2026-06-06). This contract forces the full
# deliverable into the reply itself. Codex (structured --output-last-message) is immune
# and uses a different code path, so this is correctly scoped to Opus only.
_OPUS_OUTPUT_CONTRACT = (
    "# OUTPUT CONTRACT (read first — non-negotiable)\n"
    "Your entire reply is captured verbatim and saved as the consultation artifact. "
    "There is NO other file. Therefore:\n"
    "- Emit the COMPLETE deliverable inline as your reply — every section, in full.\n"
    "- Do NOT use tools and do NOT write or save any file yourself. Your message IS the "
    "file.\n"
    "- Do NOT reply with a summary, a sign-off, or a note that the work is \"written to\" / "
    "\"saved to\" / \"in the file\" / \"at the bottom of the file.\" If it is not in this "
    "reply, it does not exist.\n\n---\n\n"
)


def _build_opus_user_content(prompt, context, prior_context, follow_up):
    """Assemble the full user message for any Opus engine path."""
    user_content = _OPUS_OUTPUT_CONTRACT + prompt
    if context:
        user_content += "\n\n---\n\n# PROJECT CONTEXT\n\n" + context
    if prior_context:
        user_content += "\n\n---\n\n# PRIOR CONSULTATION ROUNDS\n\n" + prior_context
    if follow_up:
        user_content += (
            "\n\n---\n\n# BUILDER'S RESPONSE (current round)\n\n" + follow_up
        )
    return user_content


# claude CLI accepts these effort levels as of 2026-05-28. Source of truth is
# `claude --help`; if it diverges, update this tuple. The CONSULT_EXTRA_EFFORTS
# env var lets a power user add forward-compat values without editing this file
# (comma-separated, e.g. CONSULT_EXTRA_EFFORTS=extreme,ultra).
_CORE_EFFORTS = ("low", "medium", "high", "xhigh", "max")
_EXTRA_EFFORTS = tuple(
    e.strip()
    for e in os.environ.get("CONSULT_EXTRA_EFFORTS", "").split(",")
    if e.strip()
)
VALID_EFFORTS = _CORE_EFFORTS + _EXTRA_EFFORTS

# Codex (GPT-5.x) reasoning_effort scale is distinct from claude's --effort scale.
# Codex tops out at "high"; claude's xhigh/max have no codex equivalent and clamp
# down to "high". Consults are high-stakes reasoning, so codex defaults to "high".
_CODEX_EFFORTS = ("minimal", "low", "medium", "high")
_CODEX_DEFAULT_EFFORT = "high"


def _resolve_codex_effort(effort):
    """Map a --effort value (claude scale) to a codex model_reasoning_effort level.

    Returns (level, clamped). None -> ("high", False); minimal/low/medium/high pass
    through unchanged; xhigh/max/anything-higher -> ("high", True).
    """
    if not effort:
        return _CODEX_DEFAULT_EFFORT, False
    if effort in _CODEX_EFFORTS:
        return effort, False
    return "high", True

# Per-effort tmux poll-timeout defaults (seconds). max/xhigh routinely exceed
# the prior 1800s ceiling — they need real headroom or the watchdog kills the
# run mid-thinking. CLI --timeout overrides these.
_DEFAULT_TIMEOUT_BY_EFFORT = {
    None: 1800,
    "low": 1800,
    "medium": 1800,
    "high": 2400,
    "xhigh": 3600,
    "max": 5400,
}


def run_opus_consultation_tmux(
    context: str,
    prompt: str,
    model: str = OPUS_MODEL,
    follow_up: str = None,
    prior_context: str = None,
    temperature: float = 0.7,  # ignored — claude CLI does not accept temperature
    max_tokens: int = 16384,  # ignored — claude CLI manages its own budget
    effort: str = None,
    timeout: int = None,
) -> str:
    """Run Opus consultation via a local tmux session wrapping `claude --print`.

    Uses the user's Claude subscription (OAuth keychain), NOT the Anthropic API.
    ANTHROPIC_API_KEY is stripped from the subprocess env so claude falls back
    to subscription auth. The session is watchable via `tmux attach -t <name>`.

    effort: optional reasoning effort passed to `claude --effort` (low/medium/
    high/xhigh/max, plus any CONSULT_EXTRA_EFFORTS additions). Validated here
    so programmatic callers (not just argparse) get a clear error on bad input.
    Omitting effort uses the CLI's session default.

    timeout: poll-timeout seconds. If None, picks a default from effort
    (high → 2400, xhigh → 3600, max → 5400, else 1800).
    """
    import uuid
    import shlex

    if effort is not None and effort not in VALID_EFFORTS:
        raise ValueError(
            f"Invalid effort {effort!r}. Allowed: {VALID_EFFORTS}. "
            f"Add to CONSULT_EXTRA_EFFORTS if the claude CLI gained a new level."
        )

    if timeout is None:
        # For known effort levels, use the table. For CONSULT_EXTRA_EFFORTS
        # entries (presumed to be future levels at or above `max`), use the
        # max-tier timeout so the watchdog doesn't kill them prematurely.
        if effort in _DEFAULT_TIMEOUT_BY_EFFORT:
            timeout = _DEFAULT_TIMEOUT_BY_EFFORT[effort]
        elif effort in _EXTRA_EFFORTS:
            timeout = _DEFAULT_TIMEOUT_BY_EFFORT["max"]
        else:
            timeout = 1800

    user_content = _build_opus_user_content(prompt, context, prior_context, follow_up)
    char_count = len(user_content)
    est_tokens = char_count // 4

    session = f"consult-opus-{uuid.uuid4().hex[:8]}"
    prompt_file = Path(f"/tmp/{session}-prompt.md")
    output_tmp = Path(f"/tmp/{session}-out.md")
    done_file = Path(f"/tmp/{session}-done")

    prompt_file.write_text(user_content)

    effort_label = f", effort={effort}" if effort else ""
    print(f"Prompt: {char_count:,} chars (~{est_tokens:,} tokens)", file=sys.stderr)
    print(
        f"Engine: {_opus_label_from_model(model)} via tmux/subscription "
        f"(model={model}{effort_label}, timeout={timeout}s, session: {session})",
        file=sys.stderr,
    )
    print(f"Attach to watch: tmux attach -t {session}", file=sys.stderr)
    print("Sending...", file=sys.stderr)

    effort_flag = f"--effort {shlex.quote(effort)} " if effort else ""

    # Shell command the tmux session will run.
    # stdin redirection from prompt file → claude --print → stdout to output file
    # touch done_file on completion so the parent can poll.
    cmd = (
        f"claude --print --permission-mode bypassPermissions "
        f"--model {shlex.quote(model)} "
        f"{effort_flag}"
        f"< {shlex.quote(str(prompt_file))} "
        f"> {shlex.quote(str(output_tmp))} 2>&1; "
        f"touch {shlex.quote(str(done_file))}"
    )

    # Strip ANTHROPIC_API_KEY from env so claude uses subscription OAuth
    env = os.environ.copy()
    env.pop("ANTHROPIC_API_KEY", None)

    t0 = time.time()
    subprocess.run(
        [
            "tmux",
            "new-session",
            "-d",
            "-s",
            session,
            "-c",
            str(Path.home()),
            "sh",
            "-c",
            cmd,
        ],
        env=env,
        check=True,
    )

    # Poll for done file. Timeout was set above based on effort or explicit param.
    poll_interval = 3
    while time.time() - t0 < timeout:
        if done_file.exists():
            break
        time.sleep(poll_interval)
    else:
        subprocess.run(["tmux", "kill-session", "-t", session], check=False)
        # Preserve output_tmp for forensics, but clean up prompt + done marker.
        # output_tmp path is surfaced in the error so the caller can fetch it.
        for f in [prompt_file, done_file]:
            try:
                f.unlink()
            except FileNotFoundError:
                pass
        raise TimeoutError(
            f"Opus tmux consult '{session}' timed out after {timeout}s. "
            f"Check partial output at {output_tmp} (preserved); prompt + done marker "
            f"cleaned up. Bump timeout via --timeout if this is an xhigh/max run."
        )

    elapsed = time.time() - t0

    if not output_tmp.exists():
        subprocess.run(["tmux", "kill-session", "-t", session], check=False)
        # Clean up the prompt + done marker on this rare-but-reachable path
        # (done_file existed → loop exited → output_tmp missing). Without
        # this, every malformed-output failure leaks two /tmp files.
        for f in [prompt_file, done_file]:
            try:
                f.unlink()
            except FileNotFoundError:
                pass
        raise RuntimeError(f"Output file {output_tmp} missing after tmux run completed")

    response_text = output_tmp.read_text()

    # Clean up tmux session + temp files
    subprocess.run(["tmux", "kill-session", "-t", session], check=False)
    for f in [prompt_file, output_tmp, done_file]:
        try:
            f.unlink()
        except FileNotFoundError:
            pass

    print(
        f"Response: {len(response_text):,} chars in {elapsed:.1f}s "
        f"(subscription — no API cost)",
        file=sys.stderr,
    )

    return response_text


def run_opus_consultation(
    context: str,
    prompt: str,
    model: str = OPUS_MODEL,
    follow_up: str = None,
    prior_context: str = None,
    temperature: float = 0.7,
    max_tokens: int = 16384,
) -> str:
    """Send a consultation prompt to Claude Opus via the Anthropic API.

    Uses ANTHROPIC_API_KEY (pay-per-token). For subscription-backed calls,
    use run_opus_consultation_tmux instead (default via --engine opus).
    Note: this path does not yet expose extended-thinking. Use --engine opus
    + --effort for reasoning effort; this path runs at standard inference.
    """
    import anthropic

    api_key = os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        print("ERROR: ANTHROPIC_API_KEY not set", file=sys.stderr)
        sys.exit(1)

    client = anthropic.Anthropic(api_key=api_key)

    # Build the user message (same structure as Gemini path)
    user_content = prompt

    if context:
        user_content += "\n\n---\n\n# PROJECT CONTEXT\n\n" + context

    if prior_context:
        user_content += "\n\n---\n\n# PRIOR CONSULTATION ROUNDS\n\n" + prior_context

    if follow_up:
        user_content += (
            "\n\n---\n\n# BUILDER'S RESPONSE (current round)\n\n" + follow_up
        )

    # Stats
    char_count = len(user_content)
    est_tokens = char_count // 4
    print(f"Prompt: {char_count:,} chars (~{est_tokens:,} tokens)", file=sys.stderr)
    print(
        f"Engine: {_opus_label_from_model(model)} via Anthropic API ({model})",
        file=sys.stderr,
    )
    print("Sending...", file=sys.stderr)

    t0 = time.time()
    response_text = ""
    in_tokens = 0
    out_tokens = 0
    try:
        with client.messages.stream(
            model=model,
            max_tokens=max_tokens,
            temperature=temperature,
            messages=[{"role": "user", "content": user_content}],
        ) as stream:
            for text in stream.text_stream:
                response_text += text
            final = stream.get_final_message()
            in_tokens = getattr(final.usage, "input_tokens", 0)
            out_tokens = getattr(final.usage, "output_tokens", 0)
    except Exception as e:
        print(f"ERROR: Anthropic API call failed: {e}", file=sys.stderr)
        sys.exit(1)

    elapsed = time.time() - t0
    rates = OPUS_PRICING_PER_M_USD.get(model)
    if rates is None:
        cost = None
        print(
            f"WARNING: no pricing entry for model {model!r} in OPUS_PRICING_PER_M_USD; "
            f"skipping cost-ledger log to avoid silent miscount.",
            file=sys.stderr,
        )
    else:
        in_rate, out_rate = rates
        cost = (in_tokens * in_rate + out_tokens * out_rate) / 1_000_000
        if _log_cost:
            _log_cost(
                "consult",
                "anthropic",
                model,
                cost,
                units="tokens",
                count=in_tokens + out_tokens,
                context=model,
            )
    cost_str = f"~${cost:.3f}" if cost is not None else "cost unlogged"
    print(
        f"Response: {len(response_text):,} chars in {elapsed:.1f}s "
        f"({in_tokens:,} in / {out_tokens:,} out, {cost_str})",
        file=sys.stderr,
    )
    return response_text


def run_codex_consultation(
    context: str,
    prompt: str,
    model: str = None,
    follow_up: str = None,
    prior_context: str = None,
    temperature: float = 0.7,  # ignored — codex CLI manages its own budget
    max_tokens: int = 16384,   # ignored
    effort: str = None,
    timeout: int = 3600,
    output_schema: str | Path | None = None,
    cwd: str | Path | None = None,
) -> str:
    """Run a consultation via the Codex CLI (`codex exec`), non-interactively.

    Uses the user's Codex plan auth (no per-use API cost), giving a distinct-lab
    perspective (OpenAI GPT-5.x) alongside the Opus (Anthropic) engine — the
    cross-lab diversity that makes dual-engine consults catch each other's blind
    spots, but without Gemini's paid Google API. Pure reasoning: read-only
    sandbox, neutral cwd, no repo writes. Final answer captured via
    `--output-last-message`.
    """
    import uuid

    user_content = _build_opus_user_content(prompt, context, prior_context, follow_up)
    framed = (
        "You are a senior software architect acting as a consultation engine. "
        "Read the CONTEXT and PROMPT below. You MAY read files in the working "
        "directory (read-only) to GROUND your analysis against the ACTUAL code — "
        "and you SHOULD wherever a recommendation or verdict hinges on what the "
        "code/config truly does. Do not trust the context doc's summaries on "
        "load-bearing points; open the relevant file and verify. Do NOT modify, "
        "create, or delete anything. Respond with your full written analysis — "
        "exactly the deliverable the prompt asks for, in markdown.\n\n---\n\n"
        + user_content
    )
    char_count = len(framed)
    out_file = Path(f"/tmp/consult-codex-{uuid.uuid4().hex[:8]}.md")

    codex_effort, _clamped = _resolve_codex_effort(effort)

    print(f"Prompt: {char_count:,} chars (~{char_count // 4:,} tokens)", file=sys.stderr)
    print(
        f"Engine: Codex CLI ({model or 'plan default, e.g. gpt-5.x'}, "
        f"reasoning_effort={codex_effort}) — read-only, no API cost (user plan auth)",
        file=sys.stderr,
    )
    if _clamped:
        print(
            f"  (--effort {effort!r} has no codex level above 'high' — clamped to 'high')",
            file=sys.stderr,
        )
    print("Sending to codex exec...", file=sys.stderr)

    cmd = [
        "codex", "exec",
        "--skip-git-repo-check",
        "-s", "read-only",
        "--ephemeral",
        "-c", f"model_reasoning_effort={codex_effort}",  # explicit effort + provenance
        # tree the model grounds against (read-only); defaults to this repo's
        # root, but callers (e.g. nightwatch verify --repo-root) can point it at
        # the checkout they're actually verifying.
        "-C", str(cwd) if cwd is not None else str(REPO_ROOT_LOCAL),
        "-o", str(out_file),
    ]
    if model:
        cmd += ["-m", model]
    if output_schema is not None:
        cmd += ["--output-schema", str(output_schema)]
    cmd += ["-"]               # read the prompt from stdin

    # Two-attempt loop. codex (gpt-5.x, agentic) sometimes spends its whole turn
    # GROUNDING against the repo and emits only a tiny VERIFICATION/self-audit
    # footer with no deliverable (the degenerate-stub failure mode). Attempt 1
    # allows full grounding — that is VALUABLE (it catches stale context/specs and
    # must not be defeated). If attempt 1 returns a stub, attempt 2 prepends a hard
    # output-forcing directive: the model already grounded, so "stop reading, write
    # the full deliverable now" reliably produces the answer. This is the real fix
    # for the stub mode — it replaces the manual re-fire and removes any need for
    # the empty-`--codex-cwd` workaround (which would suppress grounding entirely).
    force_output_preamble = (
        "STOP. You have already grounded against the code. Do NOT read any more "
        "files. Your ENTIRE response must now be the COMPLETE written deliverable "
        "the prompt below asks for — the full analysis/findings/design in markdown, "
        "not a summary of what you explored, not a verification or self-audit block. "
        "Begin the deliverable immediately.\n\n---\n\n"
    )
    last_returncode = None
    last_stderr = ""
    for attempt in range(2):
        send = framed if attempt == 0 else force_output_preamble + framed
        t0 = time.time()
        try:
            proc = subprocess.run(
                cmd, input=send, capture_output=True, text=True, timeout=timeout
            )
        except subprocess.TimeoutExpired:
            raise RuntimeError(f"codex exec timed out after {timeout}s")
        except FileNotFoundError:
            raise RuntimeError("codex CLI not found on PATH — install/login to Codex first.")

        elapsed = time.time() - t0
        text = out_file.read_text().strip() if out_file.exists() else ""
        try:
            out_file.unlink()
        except OSError:
            pass
        last_returncode = proc.returncode
        last_stderr = proc.stderr or ""

        if text and not _is_degenerate_response(text):
            tag = " [attempt 2: output-forced after stub]" if attempt else ""
            print(
                f"Response: {len(text):,} chars in {elapsed:.1f}s (Codex plan — no API cost){tag}",
                file=sys.stderr,
            )
            return text

        if attempt == 0:
            print(
                f"  codex returned a degenerate/stub response ({len(text)} chars) — retrying "
                "once with an output-forcing directive (grounding preserved on attempt 1)...",
                file=sys.stderr,
            )

    raise RuntimeError(
        f"codex exec produced no substantive deliverable after 2 attempts "
        f"(last exit {last_returncode}); the engine emitted only a grounding/verification "
        f"stub both times.\nstderr tail:\n{last_stderr[-1500:]}"
    )


def _is_degenerate_response(text: str) -> bool:
    """True if a consult response has no substantive body — a FAILED consult
    masquerading as success. Guards the silent-failure mode where an agentic engine
    (codex in read-only mode) burns its turn reading files and emits ONLY a
    VERIFICATION/self-audit footer with no actual answer (the 2026-06-05 766-char
    no-design incident). Engine-agnostic.
    """
    stripped = (text or "").strip()
    if len(stripped) < 200:
        return True  # near-empty from any engine
    # If a VERIFICATION self-audit footer is present, the body BEFORE the LAST such
    # header must itself be substantive — else the model emitted only its self-audit.
    matches = list(re.finditer(r"(?im)^\s*#{0,6}\s*VERIFICATION\s*$", stripped))
    if matches:
        body = stripped[: matches[-1].start()].strip()
        if len(body) < 400:
            return True
    # Sign-off signature: an agentic engine (notably opus via `claude --print`) that
    # role-plays writing the deliverable to a file and emits only a short note —
    # "written to <file>" / "at the bottom of the file" — which gets captured as the
    # whole artifact (root-caused 2026-06-06; the 1064-char failure passed the length
    # checks above). Gate on length so legitimate long deliverables that merely mention
    # a file in prose are never flagged.
    if len(stripped) < 2000:
        for pat in (
            # "written to … opus_round_2.md" / "complete at … round_1" — require an
            # OUTPUT-ARTIFACT reference (.md / round_N / "the file"), not any path, so
            # design prose like "lease saved to `.session-lease.json`" is not flagged.
            r"(?i)\b(written|wrote|saved|complete[d]?)\b[^.\n]{0,80}\b(to|at|in)\b"
            r"[^.\n]{0,80}(\.md\b|round[_ ]?\d|the (file|document))",
            r"(?i)at the bottom of (the|this) (file|document)",
        ):
            if re.search(pat, stripped):
                return True
    return False


def main():
    parser = argparse.ArgumentParser(
        description="Multi-round architecture consultation (Gemini 3.1 Pro or Opus 4.8)"
    )
    parser.add_argument("--context", type=str, help="Path to context bundle file")
    parser.add_argument(
        "--context-text",
        type=str,
        help="Inline context text (alternative to --context)",
    )
    parser.add_argument(
        "--auto-package", type=str, help="Auto-package project directory as context"
    )
    parser.add_argument("--prompt", type=str, help="Path to prompt/framing file")
    parser.add_argument(
        "--prompt-text", type=str, help="Inline prompt text (alternative to --prompt)"
    )
    parser.add_argument(
        "--follow-up", type=str, help="Path to Claude's response for follow-up round"
    )
    parser.add_argument(
        "--prior-dir", type=str, help="Directory containing prior round transcripts"
    )
    parser.add_argument(
        "--output", type=str, required=True, help="Path to save Gemini's response"
    )
    parser.add_argument(
        "--engine",
        type=str,
        choices=["gemini", "opus", "opus-api", "codex"],
        default="codex",
        help=(
            "Consultation engine. 'codex' (default) runs the Codex CLI (OpenAI "
            "GPT-5.x) via the user's plan — no API cost, distinct-lab perspective, "
            "grounds read-only against the live repo. 'opus' runs Claude Opus via "
            "local tmux + subscription (no API cost; --effort supported here). "
            "'gemini' uses Google API (PAID — a 3rd-lab take, opt in explicitly). "
            "'opus-api' uses the Anthropic API directly."
        ),
    )
    parser.add_argument(
        "--codex-cwd",
        type=str,
        default=None,
        help=(
            "Working dir codex grounds against (read-only). Default: the live "
            "repo root. Point at a NEUTRAL/empty dir for a self-contained DESIGN "
            "consult whose context is complete — stops codex burning its turn "
            "exploring a large tree and emitting only a VERIFICATION footer "
            "(the no-design failure mode). codex engine only; no effect otherwise."
        ),
    )
    parser.add_argument(
        "--model",
        type=str,
        default=None,
        help=f"Override model (default: auto from engine — Gemini: {DEFAULT_MODEL}, Opus: {OPUS_MODEL})",
    )
    parser.add_argument(
        "--effort",
        type=str,
        default=None,
        choices=list(VALID_EFFORTS),
        help=(
            "Reasoning effort. opus engine: low/medium/high/xhigh/max -> claude "
            "--effort (default: CLI session default). codex engine: mapped to "
            "model_reasoning_effort (minimal/low/medium/high; xhigh/max clamp to "
            "high; default high). ERROR on gemini and opus-api. Extend the claude "
            "scale via CONSULT_EXTRA_EFFORTS."
        ),
    )
    parser.add_argument(
        "--timeout",
        type=int,
        default=None,
        help=(
            "Poll-timeout seconds for the opus/tmux engine. Default depends on "
            "--effort: high → 2400, xhigh → 3600, max → 5400, else 1800. "
            "Bump if you see TimeoutError on real max-effort runs."
        ),
    )
    parser.add_argument(
        "--temperature", type=float, default=0.7, help="Temperature (default: 0.7)"
    )
    parser.add_argument(
        "--max-tokens",
        type=int,
        default=65536,
        help="Max output tokens (default: 65536)",
    )
    parser.add_argument(
        "--include",
        type=str,
        nargs="*",
        help="Additional glob patterns to include (with --auto-package)",
    )
    parser.add_argument(
        "--prior",
        type=str,
        help="Comma-separated paths to prior round files (alternative to --prior-dir)",
    )
    parser.add_argument(
        "--system-prompt",
        type=str,
        help="Persona/system prompt to prepend (for council and adversarial modes)",
    )
    parser.add_argument(
        "--json-output",
        action="store_true",
        help="Request structured JSON output (for judge scoring calls)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be sent without calling API",
    )
    parser.add_argument(
        "--grounding",
        action="store_true",
        help="Enable Google Search grounding for Gemini (lets Gemini research the web)",
    )
    parser.add_argument(
        "--no-drive", action="store_true", help="Skip Google Drive archival"
    )
    parser.add_argument(
        "--topic",
        type=str,
        default=None,
        help="Consultation topic (for Drive folder naming; auto-detected from --output path)",
    )
    args = parser.parse_args()

    # Build context
    context = ""
    if args.context:
        context = Path(args.context).read_text()
    elif args.context_text:
        context = args.context_text
    elif args.auto_package:
        context = auto_package_project(
            Path(args.auto_package),
            include_patterns=args.include,
        )

    # Build prompt
    prompt = ""
    if args.prompt:
        prompt = Path(args.prompt).read_text()
    elif args.prompt_text:
        prompt = args.prompt_text
    else:
        print("ERROR: --prompt or --prompt-text required", file=sys.stderr)
        sys.exit(1)

    # Load follow-up
    follow_up = None
    if args.follow_up:
        follow_up = Path(args.follow_up).read_text()

    # Load prior rounds
    prior_context = None
    if args.prior_dir:
        prior_context = load_prior_rounds(Path(args.prior_dir))
    elif args.prior:
        # Comma-separated list of prior round files
        parts = []
        for fpath in args.prior.split(","):
            fpath = fpath.strip()
            p = Path(fpath)
            if p.exists():
                parts.append(f"\n--- PRIOR: {p.name} ---\n\n{p.read_text()}")
            else:
                print(f"WARNING: prior file not found: {fpath}", file=sys.stderr)
        if parts:
            prior_context = "\n".join(parts)

    # Resolve model from engine if not explicitly set
    if args.model is None:
        if args.engine in ("opus", "opus-api"):
            model = OPUS_MODEL
        elif args.engine == "codex":
            model = None  # let the Codex CLI use its own default (gpt-5.x)
        else:
            model = DEFAULT_MODEL
    else:
        model = args.model

    # Prepend system prompt (persona) if provided
    if args.system_prompt:
        prompt = args.system_prompt + "\n\n---\n\n" + prompt

    # Append JSON output instruction if requested
    if args.json_output:
        prompt += "\n\n---\n\nIMPORTANT: Respond with valid JSON only. No markdown fences, no commentary outside the JSON object."

    # --effort only applies to the opus/tmux path (claude CLI flag).
    # On gemini and opus-api it would be silently dropped — hard-error instead,
    # because silent drop on a flag the user paid attention to is worse than
    # an explicit failure. This also keeps dry-run stderr clean (was a warning
    # firing before, breaking harness phases that assert clean stderr).
    if args.effort and args.engine in ("gemini", "opus-api"):
        print(
            f"ERROR: --effort {args.effort} is not supported on engine "
            f"'{args.engine}'. Use it with 'opus' (-> claude --effort) or 'codex' "
            f"(-> model_reasoning_effort). For opus-api reasoning, use the Anthropic "
            f"SDK `thinking` parameter directly (not yet wired through consult.py).",
            file=sys.stderr,
        )
        sys.exit(2)

    # --timeout only meaningful for opus/tmux. Warn (not fatal) on misuse.
    if args.timeout is not None and args.engine != "opus":
        print(
            f"WARNING: --timeout {args.timeout} is ignored on engine "
            f"'{args.engine}' (only the opus/tmux engine has a tmux poll-timeout).",
            file=sys.stderr,
        )

    if args.dry_run:
        full = prompt
        if context:
            full += "\n\n[CONTEXT: " + str(len(context)) + " chars]\n"
        if prior_context:
            full += "\n\n[PRIOR ROUNDS: " + str(len(prior_context)) + " chars]\n"
        if follow_up:
            full += "\n\n[FOLLOW-UP: " + str(len(follow_up)) + " chars]\n"

        # Compute effective timeout (only meaningful for opus engine).
        # Mirror the live-path logic so dry-run shows what production will use.
        if args.timeout is not None:
            eff_timeout = args.timeout
        elif args.effort in _DEFAULT_TIMEOUT_BY_EFFORT:
            eff_timeout = _DEFAULT_TIMEOUT_BY_EFFORT[args.effort]
        elif args.effort in _EXTRA_EFFORTS:
            eff_timeout = _DEFAULT_TIMEOUT_BY_EFFORT["max"]
        else:
            eff_timeout = 1800

        # Dry-run output is structurally identical across engines — always print
        # Effort and Timeout lines (with N/A for non-opus) so downstream parsers
        # see a consistent contract regardless of engine.
        print("=== DRY RUN ===")
        print(f"Engine: {args.engine}")
        print(f"Model: {model}")
        if args.engine == "opus":
            print(f"Effort: {args.effort or '(CLI default)'}")
            print(f"Timeout: {eff_timeout}s")
        elif args.engine == "codex":
            _ce, _cl = _resolve_codex_effort(args.effort)
            print(f"Effort: {_ce} (codex model_reasoning_effort{' — clamped from ' + args.effort if _cl else ''})")
            print("Timeout: n/a (codex)")
        else:
            print("Effort: n/a (engine != opus)")
            print("Timeout: n/a (engine != opus)")
        print(f"Total prompt: ~{len(full):,} chars (~{len(full) // 4:,} tokens)")
        print(f"Context: {len(context):,} chars")
        if prior_context:
            print(f"Prior rounds: {len(prior_context):,} chars")
        print("\n--- PROMPT PREVIEW (first 2000 chars) ---\n")
        print(prompt[:2000])
        if len(prompt) > 2000:
            print(f"\n... ({len(prompt) - 2000} more chars)")
        return

    # Run consultation via chosen engine
    if args.engine == "opus":
        response_text = run_opus_consultation_tmux(
            context=context,
            prompt=prompt,
            model=model,
            follow_up=follow_up,
            prior_context=prior_context,
            temperature=args.temperature,
            max_tokens=args.max_tokens,
            effort=args.effort,
            timeout=args.timeout,
        )
    elif args.engine == "opus-api":
        response_text = run_opus_consultation(
            context=context,
            prompt=prompt,
            model=model,
            follow_up=follow_up,
            prior_context=prior_context,
            temperature=args.temperature,
            max_tokens=args.max_tokens,
        )
    elif args.engine == "codex":
        response_text = run_codex_consultation(
            context=context,
            prompt=prompt,
            model=model,
            follow_up=follow_up,
            prior_context=prior_context,
            temperature=args.temperature,
            max_tokens=args.max_tokens,
            effort=args.effort,
            timeout=(args.timeout or 3600),
            cwd=args.codex_cwd,
        )
    else:
        response_text = run_consultation(
            context=context,
            prompt=prompt,
            model=model,
            follow_up=follow_up,
            prior_context=prior_context,
            temperature=args.temperature,
            max_tokens=args.max_tokens,
            grounding=args.grounding,
        )

    # Guard: a response with no substantive body (e.g. an agentic engine emitted
    # only a VERIFICATION footer after burning its turn on file reads) is a FAILED
    # consult — fail loudly instead of saving it as success (2026-06-05 incident).
    if _is_degenerate_response(response_text):
        print(
            "FATAL: consult response has no substantive body "
            f"({len(response_text)} chars) — the engine likely emitted only a "
            "VERIFICATION/self-audit block and no answer. NOT saving as success. "
            "Re-run with a prompt that forbids re-grounding and demands the design "
            "as output.",
            file=sys.stderr,
        )
        sys.exit(2)

    # Save response (codex outputs carry effort provenance as YAML frontmatter so
    # effort can be attributed in A/B comparisons; other engines unchanged)
    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    file_text = response_text
    if args.engine == "codex" and str(output_path) != "/dev/stdout":
        _ce, _ = _resolve_codex_effort(args.effort)
        file_text = (
            "---\n"
            "engine: codex\n"
            f"model: {model or 'plan-default'}\n"
            f"reasoning_effort: {_ce}\n"
            f"generated: {date.today().isoformat()}\n"
            "---\n\n"
        ) + response_text
    output_path.write_text(file_text)
    print(f"Saved: {output_path}", file=sys.stderr)

    # Archive to Google Drive
    if not args.no_drive and not args.dry_run and str(output_path) != "/dev/stdout":
        # Determine topic from --topic flag, output path, or fallback
        topic = args.topic
        if not topic:
            # Try to infer from output path: consultations/{topic}/gemini_round_1.md
            out_parts = output_path.resolve().parts
            if "consultations" in out_parts:
                idx = out_parts.index("consultations")
                if idx + 1 < len(out_parts) - 1:
                    topic = out_parts[idx + 1]
            if not topic:
                topic = output_path.stem

        drive_files = {}

        # Include context
        if args.context:
            drive_files["context.md"] = Path(args.context)
        elif context:
            drive_files["context.md"] = context

        # Include prompt
        if args.prompt:
            drive_files[Path(args.prompt).name] = Path(args.prompt)
        elif prompt:
            drive_files["prompt.md"] = prompt

        # Include follow-up
        if args.follow_up:
            drive_files[Path(args.follow_up).name] = Path(args.follow_up)

        # Include prior rounds
        if args.prior_dir:
            prior_path = Path(args.prior_dir)
            for f in sorted(prior_path.glob("*_round_*.md")):
                drive_files[f.name] = f

        # Include the response
        drive_files[output_path.name] = output_path

        archive_to_drive(topic, drive_files)

    # Also print to stdout
    print(response_text)


if __name__ == "__main__":
    main()