"""
Dead code detection checks (39-42).

Checks:
  39. unreferenced_tools — .py files in tools/ not referenced in any .md, skill, agent, or other .py
  40. orphaned_editors — .html files in editors/ not reachable from console or docs
  41. unused_imports — Python files importing modules they never use (WARN, not FAIL)
  42. archive_duplicates — Files in _archive/ that duplicate active files
"""

import ast
import hashlib
import os
import re

from . import register_check, register_fix, register_section


def check_unreferenced_tools(base, discovered):
    """Find .py files in tools/ not referenced by any .md, skill, agent, or other .py."""
    results = {"pass": [], "fail": [], "warn": []}

    # Build corpus of all text in referencing files
    reference_corpus = ""
    for rel in discovered:
        if rel.endswith(".md") or rel.startswith(".claude/skills/"):
            full = os.path.join(base, rel)
            try:
                with open(full) as f:
                    reference_corpus += f.read() + "\n"
            except (IOError, OSError):
                pass

    # Also include all .py files (cross-tool imports)
    py_corpus = ""
    for rel in discovered:
        if rel.endswith(".py"):
            full = os.path.join(base, rel)
            try:
                with open(full) as f:
                    py_corpus += f.read() + "\n"
            except (IOError, OSError):
                pass

    combined = reference_corpus + py_corpus

    # Known internal/utility files that are legitimately unreferenced
    KNOWN_INTERNAL = {
        "engine_check.py",    # Self-referential
        "__init__.py",         # Package files
        "structural.py", "semantic.py", "deep_logic.py",  # engine_checks submodules
        "security.py", "gui_integrity.py",  # engine_checks submodules
        "visual_pipeline.py", "dead_code.py", "documentation.py",  # engine_checks submodules
    }

    unreferenced = []
    tool_files = [
        rel for rel in discovered
        if rel.startswith("tools/") and rel.endswith(".py")
        and not rel.startswith("tools/engine_checks/")
        and not rel.startswith("tools/_archive/")
    ]

    for rel in tool_files:
        basename = os.path.basename(rel)
        if basename in KNOWN_INTERNAL:
            continue

        # Check if basename appears in any referencing file
        stem = os.path.splitext(basename)[0]
        if basename in combined or stem in combined:
            results["pass"].append(f"{basename}: referenced")
        else:
            unreferenced.append(basename)

    if unreferenced:
        for name in unreferenced:
            results["warn"].append(
                f"Unreferenced tool: {name} — not found in any .md, skill, or import"
            )

    return results


def check_orphaned_editors(base, discovered):
    """Find .html files in editors/ not reachable from console or docs."""
    results = {"pass": [], "fail": [], "warn": []}

    editors_dir = os.path.join(base, "editors")
    if not os.path.isdir(editors_dir):
        results["warn"].append("editors/ directory not found")
        return results

    # Build corpus of console + docs + serve.py
    reference_files = [
        "editors/prepro-console.html",
        "editors/serve.py",
        "CLAUDE.md",
    ]
    # Add all .md files
    reference_files.extend(rel for rel in discovered if rel.endswith(".md"))

    corpus = ""
    for rel in reference_files:
        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                corpus += f.read() + "\n"
        except (IOError, OSError):
            pass

    # Find all .html files in editors (including _standalone/)
    html_files = [
        rel for rel in discovered
        if rel.startswith("editors/") and rel.endswith(".html")
    ]

    KNOWN_ENTRY = {
        "prepro-console.html",  # Main entry point
        "shootout_reviewer.html",  # A/B model comparison tool
        "voice_casting.html",  # Voice casting + TTS preview
    }

    for rel in html_files:
        basename = os.path.basename(rel)
        if basename in KNOWN_ENTRY:
            results["pass"].append(f"{basename}: entry point")
            continue

        if basename in corpus:
            results["pass"].append(f"{basename}: referenced")
        else:
            results["warn"].append(
                f"Orphaned editor: {basename} — not referenced from console or docs"
            )

    return results


def check_unused_imports(base, discovered):
    """Find Python files with imports that are never used in the file body.

    Reports as WARN since dynamic usage (getattr, globals) can't be detected.
    Only checks tools/ .py files.
    """
    results = {"pass": [], "fail": [], "warn": []}

    for rel in sorted(discovered):
        if not rel.startswith("tools/") or not rel.endswith(".py"):
            continue
        # Skip engine_checks submodules and __init__
        if "engine_checks/" in rel:
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        try:
            tree = ast.parse(content)
        except SyntaxError:
            continue

        basename = os.path.basename(rel)

        # Collect imports
        imported_names = set()
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    name = alias.asname if alias.asname else alias.name.split(".")[0]
                    imported_names.add(name)
            elif isinstance(node, ast.ImportFrom):
                for alias in node.names:
                    if alias.name == "*":
                        imported_names.clear()  # Can't track star imports
                        break
                    name = alias.asname if alias.asname else alias.name
                    imported_names.add(name)

        if not imported_names:
            continue

        # Check which imported names are used in the rest of the file
        # Simple approach: check if the name appears anywhere else in the source
        unused = []
        for name in imported_names:
            # Count occurrences — if it only appears in the import line, it's unused
            pattern = re.compile(r'\b' + re.escape(name) + r'\b')
            occurrences = len(pattern.findall(content))
            if occurrences <= 1:
                unused.append(name)

        if unused and len(unused) <= 5:  # Only report if few (avoid noise)
            results["warn"].append(
                f"{basename}: potentially unused imports: {', '.join(sorted(unused))}"
            )

    return results


def check_archive_duplicates(base, discovered):
    """Find files in _archive/ that are identical to active files."""
    results = {"pass": [], "fail": [], "warn": []}

    # Find archive directories
    archive_dirs = set()
    active_files = {}
    archive_files = {}

    for rel in discovered:
        if "_archive/" in rel or "/archive/" in rel:
            archive_files[rel] = os.path.basename(rel)
        else:
            basename = os.path.basename(rel)
            if basename not in active_files:
                active_files[basename] = rel

    if not archive_files:
        results["pass"].append("No archive files to check")
        return results

    # Check for name matches (same basename in archive and active)
    name_matches = []
    for arch_rel, arch_base in archive_files.items():
        if arch_base in active_files:
            name_matches.append((arch_rel, active_files[arch_base]))

    # For name matches, check if content is identical
    duplicates = []
    for arch_rel, active_rel in name_matches:
        arch_full = os.path.join(base, arch_rel)
        active_full = os.path.join(base, active_rel)
        try:
            with open(arch_full, "rb") as f:
                arch_hash = hashlib.md5(f.read()).hexdigest()
            with open(active_full, "rb") as f:
                active_hash = hashlib.md5(f.read()).hexdigest()
            if arch_hash == active_hash:
                duplicates.append((arch_rel, active_rel))
        except (IOError, OSError):
            pass

    if duplicates:
        for arch_rel, active_rel in duplicates:
            results["warn"].append(
                f"Archive duplicate: {arch_rel} is identical to {active_rel}"
            )
    else:
        results["pass"].append(
            f"Checked {len(name_matches)} archive/active name matches — no duplicates"
        )

    return results


# ═══════════════════════════════════════════════════════════════
# FIXES
# ═══════════════════════════════════════════════════════════════

def fix_unused_imports(base, discovered):
    """Remove unused imports from Python files in tools/.

    Only removes imports where the name appears exactly once (the import line).
    Skips star imports, __init__.py, and engine_checks/ submodules.
    """
    results = {"fixed": [], "skipped": []}

    for rel in sorted(discovered):
        if not rel.startswith("tools/") or not rel.endswith(".py"):
            continue
        if "engine_checks/" in rel:
            continue

        full = os.path.join(base, rel)
        try:
            with open(full) as f:
                content = f.read()
        except (IOError, OSError):
            continue

        try:
            tree = ast.parse(content)
        except SyntaxError:
            continue

        basename = os.path.basename(rel)
        lines = content.split("\n")

        # Collect import info: (line_number, imported_name, is_from_import)
        imports_to_check = []
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    name = alias.asname if alias.asname else alias.name.split(".")[0]
                    imports_to_check.append((node.lineno, name, alias))
            elif isinstance(node, ast.ImportFrom):
                for alias in node.names:
                    if alias.name == "*":
                        imports_to_check = []
                        break
                    name = alias.asname if alias.asname else alias.name
                    imports_to_check.append((node.lineno, name, alias))

        if not imports_to_check:
            continue

        # Find unused imports
        unused_lines = {}
        for lineno, name, alias in imports_to_check:
            pattern = re.compile(r'\b' + re.escape(name) + r'\b')
            occurrences = len(pattern.findall(content))
            if occurrences <= 1:
                if lineno not in unused_lines:
                    unused_lines[lineno] = []
                unused_lines[lineno].append(name)

        if not unused_lines or len(unused_lines) > 5:
            continue

        # Remove unused imports by rewriting lines
        modified = False
        new_lines = []
        for i, line in enumerate(lines, 1):
            if i in unused_lines:
                unused_names = unused_lines[i]
                stripped = line.strip()

                # If entire import line is unused, remove it
                # e.g., "import os" where os is unused
                if stripped.startswith("import "):
                    parts = stripped[7:].split(",")
                    remaining = [p.strip() for p in parts
                                 if p.strip().split(" as ")[0].split(".")[0].strip()
                                 not in unused_names]
                    if not remaining:
                        modified = True
                        results["fixed"].append(f"{basename}:{i} removed `{stripped}`")
                        continue
                elif stripped.startswith("from "):
                    # Parse "from X import A, B, C"
                    import_part = stripped.split(" import ", 1)
                    if len(import_part) == 2:
                        names = [n.strip() for n in import_part[1].split(",")]
                        remaining = [n for n in names
                                     if (n.split(" as ")[-1].strip()
                                         if " as " in n else n.strip())
                                     not in unused_names]
                        if not remaining:
                            modified = True
                            results["fixed"].append(f"{basename}:{i} removed `{stripped}`")
                            continue
                        elif len(remaining) < len(names):
                            indent = line[:len(line) - len(line.lstrip())]
                            new_import = f"{indent}{import_part[0]} import {', '.join(remaining)}"
                            new_lines.append(new_import)
                            modified = True
                            removed = set(names) - set(remaining)
                            results["fixed"].append(
                                f"{basename}:{i} removed unused: {', '.join(removed)}"
                            )
                            continue

            new_lines.append(line)

        if modified:
            with open(full, "w") as f:
                f.write("\n".join(new_lines))

    return results


# ═══════════════════════════════════════════════════════════════
# REGISTRATION
# ═══════════════════════════════════════════════════════════════

register_check("unreferenced_tools", "Unreferenced Tools", check_unreferenced_tools, "dead")
register_check("orphaned_editors", "Orphaned Editors", check_orphaned_editors, "dead")
register_check("unused_imports", "Unused Imports", check_unused_imports, "dead")
register_check("archive_dupes", "Archive Duplicates", check_archive_duplicates, "dead")

register_fix("unused_imports", fix_unused_imports)

register_section("dead", [
    "unreferenced_tools", "orphaned_editors", "unused_imports", "archive_dupes",
])
