"""
Documentation sync checks (43-49).

Checks:
  43. claude_md_scripts_table — CLAUDE.md Python Scripts table lists files that don't exist (or missing real files)
  44. claude_md_folder_structure — Folder structure diagram diverged from actual directory tree
  45. cost_figures_sync — Cost figures in CLAUDE.md/WORKFLOW_GUIDE not matching pricing_rates.json
  46. workflow_guide_tools — Tools table in WORKFLOW_GUIDE missing entries or listing deleted tools
  47. command_ref_paths — Skill/agent file paths in COMMAND REFERENCE table that don't exist on disk
  48. validate_docs_issues — Known validate_docs.py issues still present (renamed concepts, rule wording)
  49. built_vs_planned — "What's Built" table claims BUILT for files that don't exist
"""

import os
import re

from . import register_check, register_section


def check_claude_md_scripts_table(base, _discovered):
    """Verify CLAUDE.md Python Scripts table matches actual files on disk."""
    results = {"pass": [], "fail": [], "warn": []}

    claude_md_path = os.path.join(base, "CLAUDE.md")
    if not os.path.exists(claude_md_path):
        results["fail"].append("CLAUDE.md not found")
        return results

    with open(claude_md_path) as f:
        content = f.read()

    # Extract script filenames from the Python Scripts table
    # Pattern: | `script_name.py` | `location` | purpose |
    table_pattern = re.compile(
        r'\|\s*`([^`]+\.py)`\s*\|\s*`([^`]+)`\s*\|'
    )

    documented_scripts = {}
    for match in table_pattern.finditer(content):
        script_name = match.group(1)
        location = match.group(2).strip("/").rstrip("/")
        documented_scripts[script_name] = location

    if not documented_scripts:
        results["warn"].append("No Python Scripts table found in CLAUDE.md")
        return results

    # Check each documented script exists
    missing_on_disk = []
    for script, location in documented_scripts.items():
        full_path = os.path.join(base, location.lstrip("/"), script)
        if not os.path.exists(full_path):
            # Try without leading slash
            alt_path = os.path.join(base, location, script)
            if not os.path.exists(alt_path):
                missing_on_disk.append((script, location))

    # Check for real tools NOT in the table
    tools_dir = os.path.join(base, "tools")
    hooks_dir = os.path.join(base, ".claude", "hooks")
    lib_dir = os.path.join(base, "lib")

    actual_scripts = set()
    for scan_dir in [tools_dir, hooks_dir, lib_dir]:
        if os.path.isdir(scan_dir):
            for f in os.listdir(scan_dir):
                if f.endswith(".py") and not f.startswith("_"):
                    actual_scripts.add(f)

    documented_names = set(documented_scripts.keys())
    undocumented = actual_scripts - documented_names
    # Filter out engine_checks submodules and __init__
    undocumented = {s for s in undocumented
                    if s not in ("__init__.py",)}

    if missing_on_disk:
        for script, loc in missing_on_disk:
            results["warn"].append(
                f"CLAUDE.md lists `{script}` at `{loc}` but file doesn't exist"
            )

    if undocumented and len(undocumented) <= 10:
        for script in sorted(undocumented):
            results["warn"].append(
                f"CLAUDE.md Python Scripts table missing: {script}"
            )

    if not missing_on_disk:
        results["pass"].append(
            f"All {len(documented_scripts)} documented scripts exist on disk"
        )

    return results


def check_claude_md_folder_structure(base, _discovered):
    """Verify CLAUDE.md folder structure diagram matches actual directories."""
    results = {"pass": [], "fail": [], "warn": []}

    claude_md_path = os.path.join(base, "CLAUDE.md")
    if not os.path.exists(claude_md_path):
        results["fail"].append("CLAUDE.md not found")
        return results

    with open(claude_md_path) as f:
        content = f.read()

    # Extract folder structure from the diagram (lines with ├── or └── or │)
    # These follow the pattern: ├── dirname/
    dir_pattern = re.compile(r'[├└│─\s]+(\w[\w.-]+/)')

    # Find the FOLDER STRUCTURE section
    struct_start = content.find("## FOLDER STRUCTURE")
    if struct_start == -1:
        results["warn"].append("No FOLDER STRUCTURE section in CLAUDE.md")
        return results

    struct_end = content.find("\n## ", struct_start + 1)
    if struct_end == -1:
        struct_end = len(content)

    struct_section = content[struct_start:struct_end]

    documented_dirs = set()
    for match in dir_pattern.finditer(struct_section):
        dir_name = match.group(1).rstrip("/")
        documented_dirs.add(dir_name)

    # Check key directories exist
    KEY_DIRS = ["tools", "_development", "editors", "agents", "tools",
                "templates", "skills", "lenses", "lib", "evaluation"]

    missing = []
    for dir_name in KEY_DIRS:
        if dir_name in documented_dirs:
            # Verify it exists on disk (search common parent paths)
            found = False
            for parent in ["", "tools", "editors", ".claude"]:
                check_path = os.path.join(base, parent, dir_name)
                if os.path.isdir(check_path):
                    found = True
                    break
            if not found:
                missing.append(dir_name)

    if missing:
        for d in missing:
            results["warn"].append(
                f"CLAUDE.md folder structure lists '{d}/' but directory not found"
            )
    else:
        results["pass"].append(
            f"CLAUDE.md folder structure: {len(documented_dirs)} directories verified"
        )

    return results


def check_cost_figures_sync(base, _discovered):
    """Verify cost figures in docs match pricing_rates.json."""
    results = {"pass": [], "fail": [], "warn": []}

    rates_path = os.path.join(base, "config", "pricing_rates.json")
    if not os.path.exists(rates_path):
        results["warn"].append("pricing_rates.json not found")
        return results

    try:
        import json
        with open(rates_path) as f:
            rates_data = json.load(f)
    except (json.JSONDecodeError, IOError, OSError):
        results["warn"].append("pricing_rates.json cannot be parsed")
        return results

    rate_cards = rates_data.get("rate_cards", [])
    if not rate_cards:
        return results

    latest = rate_cards[-1]

    # Extract key rates we can check in docs
    doc_checkable = {}
    el = latest.get("elevenlabs", {})
    if "tts_api" in el:
        doc_checkable["elevenlabs_tts_rate"] = el["tts_api"].get("rate")

    # Check CLAUDE.md and workflow guide for rate mentions
    docs_to_check = [
        "CLAUDE.md",
        "docs/PRODUCTION_PIPELINE_GUIDE.md",
    ]

    for doc_rel in docs_to_check:
        full = os.path.join(base, doc_rel)
        if not os.path.exists(full):
            continue

        with open(full) as f:
            doc_content = f.read()

        basename = os.path.basename(doc_rel)

        # Check ElevenLabs rate (commonly documented as "$0.30/1K chars")
        if doc_checkable.get("elevenlabs_tts_rate"):
            rate = doc_checkable["elevenlabs_tts_rate"]
            rate_str = f"${rate:.2f}"
            if rate_str in doc_content:
                results["pass"].append(f"{basename}: ElevenLabs rate {rate_str} matches")
            elif "$0." in doc_content and "1K char" in doc_content:
                # Has a rate but might be wrong
                results["warn"].append(
                    f"{basename}: mentions ElevenLabs pricing — "
                    f"verify it matches {rate_str}/1K chars"
                )

    if not results["pass"] and not results["warn"]:
        results["pass"].append("No cost figures to verify in docs")

    return results


def check_workflow_guide_tools(base, _discovered):
    """Verify tools mentioned in WORKFLOW_GUIDE exist on disk."""
    results = {"pass": [], "fail": [], "warn": []}

    guide_path = os.path.join(base, "_docs", "PRODUCTION_PIPELINE_GUIDE.md")
    if not os.path.exists(guide_path):
        results["warn"].append("PRODUCTION_PIPELINE_GUIDE.md not found")
        return results

    with open(guide_path) as f:
        content = f.read()

    # Find Python script references in the guide
    script_pattern = re.compile(r'`(\w+\.py)`')
    referenced_scripts = set()
    for match in script_pattern.finditer(content):
        referenced_scripts.add(match.group(1))

    # Check each referenced script exists somewhere in known Python directories
    tools_dir = os.path.join(base, "tools")
    hooks_dir = os.path.join(base, ".claude", "hooks")
    lib_dir = os.path.join(base, "lib")
    editors_dir = os.path.join(base, "tools", "editors")

    existing_scripts = set()
    for scan_dir in [tools_dir, hooks_dir, lib_dir, editors_dir]:
        if os.path.isdir(scan_dir):
            for f in os.listdir(scan_dir):
                if f.endswith(".py"):
                    existing_scripts.add(f)

    missing = referenced_scripts - existing_scripts
    # Filter out obvious non-tool references
    KNOWN_EXTERNAL = {
        "setup.py", "generate_from_storyboard.py", "requirements.txt",
    }
    missing = missing - KNOWN_EXTERNAL

    if missing:
        for script in sorted(missing):
            results["warn"].append(
                f"WORKFLOW_GUIDE references `{script}` but file not found in tools/hooks/lib"
            )
    else:
        results["pass"].append(
            f"All {len(referenced_scripts)} script references in WORKFLOW_GUIDE verified"
        )

    return results


def check_command_ref_paths(base, _discovered):
    """Verify file paths in CLAUDE.md COMMAND REFERENCE table exist on disk."""
    results = {"pass": [], "fail": [], "warn": []}

    claude_md_path = os.path.join(base, "CLAUDE.md")
    if not os.path.exists(claude_md_path):
        results["fail"].append("CLAUDE.md not found")
        return results

    with open(claude_md_path) as f:
        content = f.read()

    # Find COMMAND REFERENCE section
    ref_start = content.find("## COMMAND REFERENCE")
    if ref_start == -1:
        results["warn"].append("No COMMAND REFERENCE section in CLAUDE.md")
        return results

    ref_end = content.find("\n## ", ref_start + 1)
    if ref_end == -1:
        ref_end = len(content)

    ref_section = content[ref_start:ref_end]

    # Extract backtick paths from the table
    path_pattern = re.compile(r'`(/?(?:_engine|\.claude)/[^`]+)`')

    paths_checked = 0
    missing_paths = []

    for match in path_pattern.finditer(ref_section):
        path_ref = match.group(1).lstrip("/")
        full = os.path.join(base, path_ref)
        paths_checked += 1

        if not os.path.exists(full):
            # Try with leading dot for .claude paths
            if path_ref.startswith("claude/"):
                alt = os.path.join(base, "." + path_ref)
                if os.path.exists(alt):
                    continue
            missing_paths.append(path_ref)

    if missing_paths:
        for p in missing_paths:
            results["warn"].append(
                f"CLAUDE.md COMMAND REFERENCE: path `{p}` doesn't exist"
            )
    if paths_checked > 0:
        valid = paths_checked - len(missing_paths)
        results["pass"].append(
            f"COMMAND REFERENCE: {valid}/{paths_checked} file paths verified"
        )

    return results


def check_validate_docs_issues(base, _discovered):
    """Check for known validate_docs.py issues still present."""
    results = {"pass": [], "fail": [], "warn": []}

    # Known issues: "the lens" as standalone concept (should be "lens package")
    # Pattern excludes "the lens package" (correct term) — only catches standalone "the lens"
    files_to_check = {
        "WORKFLOW_SPEC.md": [
            (r"\bthe lens\b(?! package)", "stale 'the lens' reference (should be 'lens package')"),
        ],
        "appendix_a_cliffhangers_hooks.md": [
            (r"\bthe lens\b(?! package)", "stale 'the lens' reference (should be 'lens package')"),
        ],
        "appendix_e_flux2_protocols.md": [
            (r"\bthe lens\b(?! package)", "stale 'the lens' reference (should be 'lens package')"),
        ],
    }

    for rel, checks in files_to_check.items():
        full = os.path.join(base, rel)
        if not os.path.exists(full):
            continue

        with open(full) as f:
            content = f.read()

        basename = os.path.basename(rel)
        file_issues = []

        for pattern_str, desc in checks:
            pattern = re.compile(pattern_str, re.IGNORECASE)
            matches = pattern.findall(content)
            if matches:
                file_issues.append(f"{desc} ({len(matches)} occurrences)")

        if file_issues:
            for issue in file_issues:
                results["warn"].append(f"{basename}: {issue}")
        else:
            results["pass"].append(f"{basename}: no known validate_docs issues")

    # Check consecutive rule wording in WORKFLOW_SPEC
    spec_path = os.path.join(base, "tools", "WORKFLOW_SPEC.md")
    if os.path.exists(spec_path):
        with open(spec_path) as f:
            spec = f.read()

        # The correct wording references 4+ consecutive as a violation
        spec_lower = spec.lower()
        if ("max 3 consecutive" in spec_lower or "4+ is violation" in spec_lower
                or "no 4+ consecutive" in spec_lower or "4+ consecutive" in spec_lower):
            results["pass"].append("WORKFLOW_SPEC: consecutive rule wording present")
        else:
            results["warn"].append(
                "WORKFLOW_SPEC: consecutive pattern rule wording not found"
            )

    return results


def check_built_vs_planned(base, _discovered):
    """Verify 'What's Built' tables don't claim BUILT for files that don't exist."""
    results = {"pass": [], "fail": [], "warn": []}

    # Check README.md and VISUAL_PIPELINE_STATUS.md for build claims
    docs_to_check = [
        "README.md",
        "VISUAL_PIPELINE_STATUS.md",
    ]

    for doc_rel in docs_to_check:
        full = os.path.join(base, doc_rel)
        if not os.path.exists(full):
            continue

        with open(full) as f:
            content = f.read()

        basename = os.path.basename(doc_rel)

        # Find rows claiming BUILT/DONE/COMPLETE with a filename
        built_pattern = re.compile(
            r'\|\s*`?([^|`]+\.(?:py|html|json|md))`?\s*\|[^|]*(?:BUILT|DONE|COMPLETE|✅)[^|]*\|',
            re.IGNORECASE,
        )

        claims = []
        for match in built_pattern.finditer(content):
            filename = match.group(1).strip().strip("`")
            claims.append(filename)

        # Check each claimed file exists
        missing = []
        for filename in claims:
            # Search in common locations
            found = False
            for prefix in ["tools/", "editors/", "lib/",
                           ".claude/hooks/", "templates/", ""]:
                check_path = os.path.join(base, prefix, filename)
                if os.path.exists(check_path):
                    found = True
                    break
            if not found:
                missing.append(filename)

        if missing:
            for f in missing:
                results["warn"].append(
                    f"{basename}: claims '{f}' is BUILT but file not found"
                )
        elif claims:
            results["pass"].append(
                f"{basename}: all {len(claims)} BUILT claims verified"
            )

    if not results["pass"] and not results["warn"]:
        results["pass"].append("No BUILT claims to verify")

    return results


def check_pricing_staleness(base, _discovered):
    """Warn if pricing_rates.json effective_date is more than 30 days old."""
    results = {"pass": [], "fail": [], "warn": []}

    import json
    from datetime import datetime

    rates_path = os.path.join(base, "config", "pricing_rates.json")
    if not os.path.exists(rates_path):
        results["warn"].append("pricing_rates.json not found")
        return results

    try:
        with open(rates_path) as f:
            data = json.load(f)
    except (json.JSONDecodeError, IOError, OSError):
        results["warn"].append("pricing_rates.json cannot be parsed")
        return results

    rate_cards = data.get("rate_cards", [])
    if not rate_cards:
        results["warn"].append("No rate cards in pricing_rates.json")
        return results

    # Most recent rate card (last in list)
    latest = rate_cards[-1]
    effective = latest.get("effective_date")
    if not effective:
        results["warn"].append("No effective_date in latest rate card")
        return results

    try:
        date_obj = datetime.strptime(effective, "%Y-%m-%d")
        days_old = (datetime.now() - date_obj).days
    except ValueError:
        results["warn"].append(f"Cannot parse effective_date: {effective}")
        return results

    if days_old > 30:
        results["warn"].append(
            f"Pricing stale: effective_date {effective} is {days_old} days old "
            f"— verify rates against current API pricing"
        )
    else:
        results["pass"].append(
            f"Pricing current: {effective} ({days_old} days old)"
        )

    return results


# ═══════════════════════════════════════════════════════════════
# REGISTRATION
# ═══════════════════════════════════════════════════════════════

register_check("scripts_table", "CLAUDE.md Scripts Table", check_claude_md_scripts_table, "docsync")
register_check("folder_structure", "CLAUDE.md Folder Structure", check_claude_md_folder_structure, "docsync")
register_check("cost_figures", "Cost Figures Sync", check_cost_figures_sync, "docsync")
register_check("guide_tools", "Workflow Guide Tools", check_workflow_guide_tools, "docsync")
register_check("command_ref_paths", "Command Reference Paths", check_command_ref_paths, "docsync")
register_check("validate_docs_issues", "Validate Docs Issues", check_validate_docs_issues, "docsync")
register_check("built_vs_planned", "Built vs Planned", check_built_vs_planned, "docsync")
register_check("pricing_staleness", "Pricing Staleness", check_pricing_staleness, "docsync")

register_section("docsync", [
    "scripts_table", "folder_structure", "cost_figures",
    "guide_tools", "command_ref_paths", "validate_docs_issues", "built_vs_planned",
    "pricing_staleness",
])
