"""Shared Linear queue helpers.

This module is deliberately limited to pure derivation helpers plus thin
filesystem I/O. Producers enqueue JSON events; the live drainer is the only
component that may talk to Linear.
"""

from __future__ import annotations

import hashlib
import json
import os
import re
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

DEFAULT_QUEUE_ROOT = Path.home() / "Dropbox/Claude_Config/maintenance/linear-queue"
INBOX = DEFAULT_QUEUE_ROOT / "inbox"
DONE = DEFAULT_QUEUE_ROOT / "done"
LEDGER = DEFAULT_QUEUE_ROOT / "ledger.jsonl"

SCHEMA_VERSION = 1

EVENT_KEYS = {
    "schema_version",
    "event_id",
    "event_ts",
    "source_type",
    "session_id",
    "finding_key",
    "category",
    "subject_kind",
    "subject_id",
    "file",
    "title",
    "evidence",
    "recommendation",
    "pr_url",
    "branch",
    "head_sha",
}

BUILD_ONLY_KEYS = {
    "law_or_rule",
    "normalized_anchor",
    "producer_run_scope",
}

SOURCE_TYPES = {"manual_file", "nightwatch", "pr_untracked_work"}
CATEGORIES = {
    "untracked_work",
    "architectural-law",
    "infra",
    "process",
    "SSOT",
    "bug",
    "efficiency",
    "dead-code",
    "drift",
    "hygiene",
    "scanner",
}
SUBJECT_KINDS = {"pr", "repo", "hook", "path", "file"}
TERMINAL_LEDGER_STATES = {"filed", "deduped"}

_SHA256_RE = re.compile(r"^sha256:[0-9a-f]{64}$")


def utc_now_iso() -> str:
    """ISO-8601 UTC with a literal Z suffix."""
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def sha256_text(text: str) -> str:
    return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest()


def normalize_text(value: str) -> str:
    """Collapse whitespace and strip ends without changing code or quotes."""
    if not value:
        return ""
    return re.sub(r"\s+", " ", value).strip()


def derive_claim_fingerprint(title: str, evidence: str, recommendation: str) -> str:
    parts = [
        normalize_text(title),
        normalize_text(evidence),
        normalize_text(recommendation),
    ]
    return sha256_text("|".join(parts))


_CLAIM_SIG_BACKTICK_RE = re.compile(r"`([^`]+)`")
_CLAIM_SIG_IDENT_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
_CLAIM_SIG_CAMEL_RE = re.compile(r"[a-z][A-Z]")
# Dotted attribute access (a.b.c) — reduce to its FINAL component so the
# receiver/class can't win the lex race over the called method/attribute.
_CLAIM_SIG_DOTTED_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*(?:\.[A-Za-z_][A-Za-z0-9_]*)+")
# Quoted string-literal CONTENTS inside a span are arguments/values, NOT the
# claim subject.
_CLAIM_SIG_STRLIT_RE = re.compile(r"\"[^\"]*\"|'[^']*'")
# Strip ONLY explicit "line(s) N" / "line N-M" refs. Keep paths, ports, ratios,
# bare :N/@N, and other semantic tokens.
_CLAIM_SIG_LINEREF_RE = re.compile(r"\blines?\b\s*\d+(?:\s*[-–]\s*\d+)?", re.I)


def derive_claim_signature(evidence: str) -> str:
    """Title-free stable projection of a finding's claim, for finding_key.

    Picks the lexicographically-first PUBLIC salient cited code symbol from the
    evidence. Falls back to a stable hash of the full normalized evidence when
    no salient symbol exists, and returns "" only for truly empty evidence.
    """
    text = evidence or ""
    salient: set[str] = set()
    spans = _CLAIM_SIG_BACKTICK_RE.findall(text)
    for span in spans:
        span = _CLAIM_SIG_STRLIT_RE.sub(" ", span)
        reduced = _CLAIM_SIG_DOTTED_RE.sub(
            lambda m: m.group(0).rsplit(".", 1)[-1], span
        )
        for ident in _CLAIM_SIG_IDENT_RE.findall(reduced):
            if len(ident) > 2 and ("_" in ident or _CLAIM_SIG_CAMEL_RE.search(ident)):
                salient.add(ident.lower())
    if salient:
        public = sorted(s for s in salient if not s.startswith("_"))
        if public:
            return public[0]
        return sorted(salient)[0]
    prose = text.lower()
    prose = _CLAIM_SIG_LINEREF_RE.sub("", prose)
    prose = normalize_text(prose)
    if prose:
        return "prose:" + sha256_text(prose)[len("sha256:"):][:16]
    return ""


def effective_claim_signature(evidence: str, normalized_anchor: str | None) -> str:
    """claim_signature, or a namespaced normalized_anchor tiebreak when evidence is empty."""
    sig = derive_claim_signature(evidence)
    if sig:
        return sig
    na = normalize_text(str(normalized_anchor)) if normalized_anchor is not None else ""
    if na and na != "anchor_not_found":
        return "anchor:" + na.lower()
    return ""


def derive_finding_key_code(
    category: str,
    file: str | None,
    claim_signature: str,
) -> str:
    parts = [
        (category or "").lower(),
        file or "",
        claim_signature,
    ]
    return sha256_text("|".join(parts))


def derive_finding_key_subject(
    category: str,
    subject_kind: str,
    subject_id: str,
    claim_signature: str,
) -> str:
    parts = [
        (category or "").lower(),
        (subject_kind or "").lower(),
        subject_id or "",
        claim_signature,
    ]
    return sha256_text("|".join(parts))


def derive_event_id(
    finding_key: str,
    source_type: str,
    producer_run_scope: str,
) -> str:
    return sha256_text("|".join([finding_key, source_type, producer_run_scope]))


def build_event(**fields: Any) -> dict:
    """Assemble and validate an inbox event envelope."""
    unknown = sorted(set(fields) - EVENT_KEYS - BUILD_ONLY_KEYS)
    if unknown:
        raise ValueError(f"unknown event keys: {', '.join(unknown)}")

    law_or_rule = fields.pop("law_or_rule", None)
    normalized_anchor = fields.pop("normalized_anchor", None)
    producer_run_scope = fields.pop("producer_run_scope", None)

    event = {
        "schema_version": SCHEMA_VERSION,
        "event_ts": utc_now_iso(),
        "session_id": None,
        "subject_kind": None,
        "subject_id": None,
        "file": None,
        "pr_url": None,
        "branch": None,
        "head_sha": None,
    }
    event.update(fields)

    _derive_missing_identity(
        event,
        law_or_rule=law_or_rule,
        normalized_anchor=normalized_anchor,
        producer_run_scope=producer_run_scope,
    )

    problems = validate_event(event)
    if problems:
        raise ValueError("; ".join(problems))
    return event


def validate_event(event: dict) -> list[str]:
    """Validate an untrusted inbox event."""
    problems: list[str] = []
    if not isinstance(event, dict):
        return ["event must be a JSON object"]

    missing = sorted(EVENT_KEYS - set(event))
    unknown = sorted(set(event) - EVENT_KEYS)
    if missing:
        problems.append(f"missing required keys: {', '.join(missing)}")
    if unknown:
        problems.append(f"unknown top-level keys: {', '.join(unknown)}")

    if problems:
        return problems

    if event["schema_version"] != SCHEMA_VERSION:
        problems.append("schema_version must be 1")
    if not _is_sha256(event["event_id"]):
        problems.append("event_id must be sha256:<64 lowercase hex>")
    if not _is_sha256(event["finding_key"]):
        problems.append("finding_key must be sha256:<64 lowercase hex>")
    if not isinstance(event["event_ts"], str) or not event["event_ts"]:
        problems.append("event_ts must be a non-empty string")
    if event["source_type"] not in SOURCE_TYPES:
        problems.append(f"source_type must be one of: {', '.join(sorted(SOURCE_TYPES))}")
    if event["category"] not in CATEGORIES:
        problems.append(f"category must be one of: {', '.join(sorted(CATEGORIES))}")

    subject_kind = event["subject_kind"]
    if subject_kind is not None and subject_kind not in SUBJECT_KINDS:
        problems.append(f"subject_kind must be null or one of: {', '.join(sorted(SUBJECT_KINDS))}")

    for key in ("title", "evidence", "recommendation"):
        if not isinstance(event[key], str):
            problems.append(f"{key} must be a string")

    for key in ("session_id", "subject_id", "file", "pr_url", "branch", "head_sha"):
        if event[key] is not None and not isinstance(event[key], str):
            problems.append(f"{key} must be a string or null")

    return problems


def atomic_write_inbox(event: dict, inbox: Path = INBOX) -> Path:
    problems = validate_event(event)
    if problems:
        raise ValueError("; ".join(problems))

    inbox.mkdir(parents=True, exist_ok=True)
    final_path = inbox / f"{event['event_id']}.json"
    payload = json.dumps(event, sort_keys=True, separators=(",", ":")) + "\n"

    fd, temp_name = tempfile.mkstemp(
        prefix=f".{event['event_id']}.",
        suffix=".tmp",
        dir=inbox,
        text=True,
    )
    temp_path = Path(temp_name)
    try:
        with os.fdopen(fd, "w", encoding="utf-8") as fh:
            fh.write(payload)
            fh.flush()
            os.fsync(fh.fileno())
        os.replace(temp_path, final_path)
    finally:
        if temp_path.exists():
            temp_path.unlink()
    return final_path


def read_inbox(inbox: Path = INBOX) -> list[dict | tuple[str, str]]:
    if not inbox.exists():
        return []

    items: list[dict | tuple[str, str]] = []
    for path in sorted(inbox.glob("*.json")):
        try:
            with open(path, "r", encoding="utf-8") as fh:
                event = json.load(fh)
        except (OSError, json.JSONDecodeError) as exc:
            items.append((str(path), str(exc)))
            continue

        problems = validate_event(event) if isinstance(event, dict) else ["event must be a JSON object"]
        if problems:
            items.append((str(path), "; ".join(problems)))
            continue
        items.append(event)
    return items


def append_ledger(row: dict, ledger: Path = LEDGER) -> None:
    ledger.parent.mkdir(parents=True, exist_ok=True)
    with open(ledger, "a", encoding="utf-8") as fh:
        fh.write(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n")


def load_ledger(ledger: Path = LEDGER) -> list[dict]:
    if not ledger.exists():
        return []

    rows: list[dict] = []
    with open(ledger, "r", encoding="utf-8") as fh:
        for line in fh:
            line = line.strip()
            if not line:
                continue
            rows.append(json.loads(line))
    return rows


def latest_ledger_state(finding_key: str, ledger_rows: list[dict]) -> str | None:
    latest: str | None = None
    for row in ledger_rows:
        if row.get("finding_key") == finding_key:
            state = row.get("state")
            latest = state if isinstance(state, str) else None
    return latest


def is_already_handled(finding_key: str, ledger_rows: list[dict]) -> bool:
    return latest_ledger_state(finding_key, ledger_rows) in TERMINAL_LEDGER_STATES


def move_to_done(event_id: str, inbox: Path = INBOX, done: Path = DONE) -> None:
    done.mkdir(parents=True, exist_ok=True)
    os.replace(inbox / f"{event_id}.json", done / f"{event_id}.json")


def _derive_missing_identity(
    event: dict,
    *,
    law_or_rule: object,
    normalized_anchor: object,
    producer_run_scope: object,
) -> None:
    if "finding_key" not in event or event["finding_key"] is None:
        effective_signature = effective_claim_signature(
            str(event.get("evidence") or ""),
            str(normalized_anchor) if normalized_anchor is not None else None,
        )
        subject_kind = event.get("subject_kind")
        subject_id = event.get("subject_id")
        if subject_kind and subject_id:
            event["finding_key"] = derive_finding_key_subject(
                str(event.get("category") or ""),
                str(subject_kind),
                str(subject_id),
                effective_signature,
            )
        elif event.get("file"):
            event["finding_key"] = derive_finding_key_code(
                str(event.get("category") or ""),
                str(event.get("file") or ""),
                effective_signature,
            )
        else:
            event["finding_key"] = derive_finding_key_code(
                str(event.get("category") or ""),
                "",
                effective_signature,
            )

    if "event_id" not in event or event["event_id"] is None:
        finding_key = event.get("finding_key")
        source_type = event.get("source_type")
        if finding_key and source_type:
            scope = _default_producer_run_scope(event, producer_run_scope)
            event["event_id"] = derive_event_id(str(finding_key), str(source_type), scope)


def _default_producer_run_scope(event: dict, producer_run_scope: object) -> str:
    if producer_run_scope is not None:
        return str(producer_run_scope)
    if event.get("source_type") == "pr_untracked_work":
        return str(event.get("pr_url") or event.get("subject_id") or event.get("branch") or "")
    event_date = str(event.get("event_ts") or "")[:10]
    return "|".join(
        [
            str(event.get("finding_key") or ""),
            event_date,
            str(event.get("session_id") or ""),
        ]
    )


def _is_sha256(value: object) -> bool:
    return isinstance(value, str) and bool(_SHA256_RE.match(value))
