"""
Semantic checks (7-18): stale values, patterns, examples, counting.

Checks:
  7.  Stale Value Detection
  8.  Shared Counting Consistency
  9.  Hook Safety Patterns
  10. Template Value Consistency
  11. Agent Example Plausibility
  12. Hardcoded Constants Detection
  13. Inventory Count Verification
  14. Exit Code Convention
  15. JSON Syntax Validation
  16. Python Syntax Validation
  17. Placeholder Detection
  18. Table Duplication Detection
"""

import ast
import fnmatch
import json
import os
import re

from . import register_check, register_section

# ═══════════════════════════════════════════════════════════════
# STALE VALUE PATTERNS
# ═══════════════════════════════════════════════════════════════

STALE_VALUE_PATTERNS = [
    {
        "pattern": r"250[\s-]*300\s*words",
        "description": "Pre-V12 word count (250-300)",
        "scan": ["templates/*.json", "agents/*.md"],
    },
    {
        "pattern": r"(?:hard.cap|max(?:imum)?)\D{0,10}315",
        "description": "Pre-V12 hard cap (315)",
        "scan": ["templates/*.json", "agents/*.md"],
    },
    {
        "pattern": r"Word count:\s*(?:2\d{2}|3\d{2})/(?:3\d{2}|400)",
        "description": "Stale example word count (under 400)",
        "scan": ["agents/*.md"],
    },
    {
        "pattern": r"(?:men|male|audience|demo)\D{0,20}16[\s-]+35",
        "description": "Wrong demographic (16-35 instead of 18-35)",
        "scan": ["appendix_d_ai_video.md", "agents/*.md",
                 "skills/*/SKILL.md", "CLAUDE.md"],
    },
    {
        "pattern": r'"default":\s*"16:9"',
        "description": "Wrong default aspect ratio (16:9 instead of 9:16)",
        "scan": ["templates/storyboard_schema.json"],
    },
    {
        "pattern": r"(?<!\d)6[\s-]+8\s*(?:exchanges|dialogue)",
        "description": "Phantom exchange minimum (6-8 instead of Max 8)",
        "scan": ["skills/format_v12/SKILL.md", "agents/*.md"],
    },
]


def check_stale_values(base, discovered):
    """Detect pre-V12 and outdated values in agent examples and templates."""
    results = {"pass": [], "fail": [], "warn": []}

    for entry in STALE_VALUE_PATTERNS:
        pattern = re.compile(entry["pattern"], re.IGNORECASE)
        desc = entry["description"]
        found_in = []

        for glob_pat in entry["scan"]:
            for rel in sorted(discovered):
                if fnmatch.fnmatch(rel, glob_pat):
                    full = os.path.join(base, rel)
                    if not os.path.exists(full):
                        continue
                    try:
                        with open(full) as f:
                            content = f.read()
                    except (IOError, OSError):
                        continue
                    if pattern.search(content):
                        found_in.append(rel)

        if found_in:
            for rel in found_in:
                results["fail"].append(f"{desc} found in {rel}")
        else:
            results["pass"].append(f"No stale: {desc}")

    return results


def check_shared_counting(base, discovered):
    """Verify Python scripts use shared counting from engine_constants."""
    results = {"pass": [], "fail": [], "warn": []}

    COUNTING_SCRIPTS = [
        "tools/episode_metrics.py",
        "tools/generate_batch_summary.py",
        "tools/analyze_batch.py",
        ".claude/hooks/validate_batch.py",
        ".claude/hooks/baseline_comparison.py",
        ".claude/hooks/dramatic_qc_gate.py",
    ]

    SHARED_IMPORT_PATTERNS = [
        re.compile(r"from\s+engine_constants\s+import"),
        re.compile(r"import\s+engine_constants"),
    ]

    for rel in COUNTING_SCRIPTS:
        full = os.path.join(base, rel)
        if not os.path.exists(full):
            results["warn"].append(f"{rel} not found")
            continue

        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            results["warn"].append(f"Cannot read {rel}")
            continue

        basename = os.path.basename(rel)
        has_import = any(p.search(content) for p in SHARED_IMPORT_PATTERNS)
        if has_import:
            results["pass"].append(f"{basename}: imports engine_constants")
        else:
            results["fail"].append(
                f"{basename}: does NOT import engine_constants — "
                f"counting may be inconsistent"
            )

    return results


def check_hook_safety(base, discovered):
    """Verify hook scripts don't have dangerous patterns."""
    results = {"pass": [], "fail": [], "warn": []}

    BARE_EXCEPT = re.compile(r"^\s*except\s*:", re.MULTILINE)

    def has_unchecked_subprocess(content):
        all_calls = re.findall(
            r"^.*subprocess\.(?:run|call|Popen)\(.*$",
            content,
            re.MULTILINE,
        )
        for call_line in all_calls:
            if re.search(r"\w+\s*=\s*subprocess\.", call_line):
                continue
            if "check=True" in call_line:
                continue
            return True
        return False

    for rel in sorted(discovered):
        if not rel.startswith(".claude/hooks/") or not rel.endswith(".py"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        basename = os.path.basename(rel)
        issues = []

        if BARE_EXCEPT.search(content):
            issues.append("bare except: clause (swallows all exceptions)")
            results["fail"].append(f"{basename}: {issues[-1]}")

        if has_unchecked_subprocess(content):
            issues.append("subprocess call without return code check")
            results["warn"].append(f"{basename}: {issues[-1]}")

        if not issues:
            results["pass"].append(f"{basename}: safe patterns")

    return results


# ═══════════════════════════════════════════════════════════════
# TEMPLATE VALUE CHECKS
# ═══════════════════════════════════════════════════════════════

TEMPLATE_VALUE_CHECKS = [
    {
        "file": "templates/state_template.json",
        "checks": [
            ("word_count_target.*450.*500", "word_count_target should be 450-500"),
            ("hard_cap.*500", "hard_cap should reference 500"),
        ],
    },
    {
        "file": "templates/storyboard_schema.json",
        "checks": [
            ('"9:16"', "default aspect ratio should be 9:16"),
        ],
    },
]

ANCHOR_TYPES = ["CUB", "GHOST", "MIRROR", "SKEPTIC", "TETHER", "WITNESS", "FOIL", "COST"]

ANCHOR_FILES = [
    "templates/series_bible_template.md",
    "templates/episode_arc_template.md",
]


def check_template_values(base, discovered):
    """Verify template file values match CONSTANTS.md."""
    results = {"pass": [], "fail": [], "warn": []}

    for entry in TEMPLATE_VALUE_CHECKS:
        rel = entry["file"]
        full = os.path.join(base, rel)
        basename = os.path.basename(rel)

        if not os.path.exists(full):
            results["warn"].append(f"{basename}: file not found")
            continue

        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            results["warn"].append(f"{basename}: cannot read")
            continue

        for pattern_str, desc in entry["checks"]:
            pattern = re.compile(pattern_str, re.IGNORECASE)
            if pattern.search(content):
                results["pass"].append(f"{basename}: {desc}")
            else:
                results["fail"].append(f"{basename}: {desc} — NOT FOUND")

    for rel in ANCHOR_FILES:
        full = os.path.join(base, rel)
        basename = os.path.basename(rel)
        if not os.path.exists(full):
            results["warn"].append(f"{basename}: not found (anchor check skipped)")
            continue

        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        missing = [t for t in ANCHOR_TYPES if t not in content]
        if missing:
            results["fail"].append(
                f"{basename}: missing anchor types: {', '.join(missing)}"
            )
        else:
            results["pass"].append(f"{basename}: all 8 anchor types present")

    return results


def check_agent_examples(base, discovered):
    """Verify agent .md example outputs contain plausible values."""
    results = {"pass": [], "fail": [], "warn": []}

    WORD_COUNT_EXAMPLE = re.compile(
        r"(?:Word count|Words?):\s*(\d+)\s*/\s*(\d+)"
    )
    EPISODE_TOTAL = re.compile(
        r"Episode total:\s*(\d+)\s*→\s*(\d+)"
    )

    for rel in sorted(discovered):
        if not rel.startswith("agents/") or not rel.endswith(".md"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        basename = os.path.basename(rel)
        file_issues = []

        for match in WORD_COUNT_EXAMPLE.finditer(content):
            actual = int(match.group(1))
            limit = int(match.group(2))
            if limit < 400:
                file_issues.append(
                    f"stale word count example: {actual}/{limit} "
                    f"(should be in 450-500 range)"
                )

        for match in EPISODE_TOTAL.finditer(content):
            before = int(match.group(1))
            after = int(match.group(2))
            if after < 400 or before < 400:
                file_issues.append(
                    f"stale episode total example: {before}→{after} "
                    f"(should be in 450-500 range)"
                )

        if file_issues:
            for issue in file_issues:
                results["fail"].append(f"{basename}: {issue}")
        else:
            results["pass"].append(f"{basename}: examples plausible")

    return results


def check_hardcoded_constants(base, discovered):
    """Scan .md files for numeric values matching CONSTANTS.md patterns."""
    results = {"pass": [], "fail": [], "warn": []}

    CONSTANT_PATTERNS = [
        (r"450[\s-]*500", "word count range 450-500"),
        (r"40%|≤\s*40", "dialogue max 40%"),
        (r"70[\s-]*85%", "hook/cliffhanger ratios 70-85%"),
        (r"[Mm]ax\s*3\s*consecutive", "pattern limit max 3 consecutive"),
        (r"18[\s-]*24", "storyboard target 18-24 shots"),
        (r"50mm.*f/2\.0", "primary lens 50mm f/2.0"),
        (r"85mm.*f/1\.4", "close-up lens 85mm f/1.4"),
        (r"24mm.*f/8", "wide lens 24mm f/8"),
    ]

    for rel in sorted(discovered):
        if not rel.endswith(".md"):
            continue
        if rel == "CONSTANTS.md":
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                lines = f.readlines()
        except (IOError, OSError):
            continue

        for line_num, line in enumerate(lines):
            for pattern_str, desc in CONSTANT_PATTERNS:
                pattern = re.compile(pattern_str, re.IGNORECASE)
                if pattern.search(line):
                    start = max(0, line_num - 3)
                    end = min(len(lines), line_num + 4)
                    context = "".join(lines[start:end])
                    if "CONSTANTS.md" not in context:
                        results["warn"].append(
                            f"{rel}:{line_num+1}: {desc} without CONSTANTS.md reference"
                        )

    if not results["warn"]:
        results["pass"].append("No undocumented hardcoded constants found")

    return results


def check_inventory_counts(base, discovered):
    """Auto-count skills, tools, hooks, agents, lenses, rubrics."""
    results = {"pass": [], "fail": [], "warn": []}

    counts = {
        "skills": 0, "tools": 0, "hooks": 0,
        "agents": 0, "lenses": 0, "rubrics": 0,
    }

    skills_dir = os.path.join(base, ".claude", "skills")
    if os.path.isdir(skills_dir):
        for entry in os.listdir(skills_dir):
            if os.path.exists(os.path.join(skills_dir, entry, "SKILL.md")):
                counts["skills"] += 1

    for rel in discovered:
        if rel.startswith("tools/") and rel.endswith(".py"):
            counts["tools"] += 1
        elif rel.startswith(".claude/hooks/") and rel.endswith(".py"):
            counts["hooks"] += 1
        elif rel.startswith("agents/") and rel.endswith(".md"):
            counts["agents"] += 1
        elif rel.startswith("lenses/") and rel.endswith(".md"):
            counts["lenses"] += 1
        elif rel.startswith("evaluation/rubrics/") and rel.endswith(".md"):
            counts["rubrics"] += 1

    COUNT_PATTERNS = [
        (r"(\d+)\s*skills", "skills"),
        (r"(\d+)\s*(?:Python|scripts?|tools?)", "tools"),
        (r"(\d+)\s*hooks?", "hooks"),
        (r"(\d+)\s*agents?", "agents"),
        (r"(\d+)\s*lenses?", "lenses"),
        (r"(\d+)\s*rubrics?", "rubrics"),
    ]

    for rel in sorted(discovered):
        if not rel.endswith(".md"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        for pattern_str, key in COUNT_PATTERNS:
            pattern = re.compile(pattern_str, re.IGNORECASE)
            for match in pattern.finditer(content):
                claimed = int(match.group(1))
                actual = counts[key]
                if claimed != actual:
                    results["warn"].append(
                        f"{rel}: claims {claimed} {key} but actual count is {actual}"
                    )

    results["pass"].append(
        f"Inventory: {counts['skills']} skills, {counts['tools']} tools, "
        f"{counts['hooks']} hooks, {counts['agents']} agents, "
        f"{counts['lenses']} lenses, {counts['rubrics']} rubrics"
    )

    return results


def check_exit_codes(base, discovered):
    """Parse all .py files for sys.exit(N). Flag any N not in {0, 1, 2}."""
    results = {"pass": [], "fail": [], "warn": []}

    EXIT_PATTERN = re.compile(r"sys\.exit\((\d+)\)")

    for rel in sorted(discovered):
        if not (rel.startswith("tools/") or rel.startswith(".claude/hooks/")):
            continue
        if not rel.endswith(".py"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        basename = os.path.basename(rel)
        bad_exits = []

        for match in EXIT_PATTERN.finditer(content):
            code = int(match.group(1))
            if code not in {0, 1, 2}:
                bad_exits.append(code)

        if bad_exits:
            results["fail"].append(
                f"{basename}: non-standard exit codes {bad_exits} "
                f"(should be 0=PASS, 1=FAIL, 2=WARNINGS)"
            )
        else:
            results["pass"].append(f"{basename}: exit codes conform to 0/1/2")

    return results


def check_json_syntax(base, discovered):
    """json.load() every .json file in templates/, evaluation/, state/."""
    results = {"pass": [], "fail": [], "warn": []}

    for rel in sorted(discovered):
        if not rel.endswith(".json"):
            continue
        if not (rel.startswith("templates/") or
                rel.startswith("evaluation/") or
                "/state/" in rel):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                json.load(f)
            results["pass"].append(f"{rel}: valid JSON")
        except json.JSONDecodeError as e:
            results["fail"].append(f"{rel}: JSON parse error: {e}")
        except (IOError, OSError):
            results["warn"].append(f"{rel}: cannot read")

    return results


def check_python_syntax(base, discovered):
    """ast.parse() every .py file in tools/ and .claude/hooks/."""
    results = {"pass": [], "fail": [], "warn": []}

    for rel in sorted(discovered):
        if not (rel.startswith("tools/") or rel.startswith(".claude/hooks/")):
            continue
        if not rel.endswith(".py"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
            ast.parse(content)
            results["pass"].append(f"{rel}: valid Python")
        except SyntaxError as e:
            results["fail"].append(f"{rel}: syntax error at line {e.lineno}: {e.msg}")
        except (IOError, OSError):
            results["warn"].append(f"{rel}: cannot read")

    return results


def check_placeholders(base, discovered):
    """Scan production .md files for [FILL], TODO, FIXME, XXX, HACK."""
    results = {"pass": [], "fail": [], "warn": []}

    PLACEHOLDER_PATTERNS = [
        re.compile(r"\[FILL\]", re.IGNORECASE),
        re.compile(r"\bTODO\b", re.IGNORECASE),
        re.compile(r"\bFIXME\b", re.IGNORECASE),
        re.compile(r"\bXXX\b"),
        re.compile(r"\bHACK\b", re.IGNORECASE),
    ]

    for rel in sorted(discovered):
        if not rel.endswith(".md"):
            continue
        if "archive" in rel.lower():
            continue
        if rel.startswith("templates/"):
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        found = []
        for pattern in PLACEHOLDER_PATTERNS:
            matches = pattern.findall(content)
            if matches:
                found.extend(matches)

        if found:
            results["warn"].append(f"{rel}: contains placeholders: {', '.join(set(found))}")
        else:
            results["pass"].append(f"{rel}: no placeholders")

    return results


def check_table_duplication(base, discovered):
    """Detect tables that should be CONSTANTS.md references."""
    results = {"pass": [], "fail": [], "warn": []}

    TABLE_PATTERNS = [
        (r"SEQ\s+1.*1-8.*Status Quo", "8-sequence skeleton table"),
        (r"Ep\s+10.*First.Crack", "emotional beat schedule table"),
        (r"SETUP.*40-55", "treatment word count table"),
    ]

    for rel in sorted(discovered):
        if not rel.endswith(".md"):
            continue
        if rel == "CONSTANTS.md":
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        for pattern_str, desc in TABLE_PATTERNS:
            pattern = re.compile(pattern_str, re.IGNORECASE | re.DOTALL)
            if pattern.search(content):
                if "CONSTANTS.md" in content:
                    results["pass"].append(f"{rel}: references CONSTANTS.md for {desc}")
                else:
                    results["warn"].append(
                        f"{rel}: contains {desc} — should reference CONSTANTS.md instead"
                    )

    return results


# ═══════════════════════════════════════════════════════════════
# REGISTRATION
# ═══════════════════════════════════════════════════════════════

register_check("stale", "Stale Value Detection", check_stale_values, "semantic")
register_check("counting", "Shared Counting Consistency", check_shared_counting, "semantic")
register_check("hooks", "Hook Safety Patterns", check_hook_safety, "semantic")
register_check("templates", "Template Value Consistency", check_template_values, "semantic")
register_check("examples", "Agent Example Plausibility", check_agent_examples, "semantic")
register_check("hardcoded", "Hardcoded Constants Detection", check_hardcoded_constants, "semantic")
register_check("inventory", "Inventory Count Verification", check_inventory_counts, "semantic", quick=True)
register_check("exitcodes", "Exit Code Convention", check_exit_codes, "semantic")
register_check("json", "JSON Syntax Validation", check_json_syntax, "semantic", quick=True)
register_check("python", "Python Syntax Validation", check_python_syntax, "semantic", quick=True)
register_check("placeholders", "Placeholder Detection", check_placeholders, "semantic")
register_check("duplicates", "Table Duplication Detection", check_table_duplication, "semantic")

# Section aliases
register_section("syntax", ["json", "python"])
