"""Tests for assert_no_proper_nouns — name-set-INDEPENDENT hardening (REC-72 D0d §29).

The audit assertion #4 must hard-fail on ANY surviving capitalized proper noun
in a bound video prompt, not only on KNOWN character names. These tests pin both
behaviors: known names still raise (subset), unknown proper nouns now raise too,
and legitimate caps (sentence-initial, cinematic/equipment brands, @ImageN, shot
abbreviations) must NOT false-positive.
"""

import pytest

from recoil.pipeline.tools.audit_assertions import assert_no_proper_nouns


def _payload(prompt: str) -> dict:
    return {"prompt": prompt}


def _shot(characters=None) -> dict:
    return {"asset_data": {"characters": characters or []}}


# ── Subset 1: known character names still raise (preserved behavior) ──────────


def test_known_character_name_raises():
    with pytest.raises(AssertionError, match="proper noun"):
        assert_no_proper_nouns(
            _payload("The subject walks. Jade enters the room."),
            "video_i2v",
            "seeddance-2.0",
            _shot([{"char_id": "Jade"}]),
        )


# ── Subset 2: name-set-INDEPENDENT scan — UNKNOWN proper nouns now raise ──────


def test_unknown_proper_noun_with_empty_characters_raises():
    """The core §29 hardening: an UNKNOWN proper noun (no known-name match,
    characters=[]) must still raise. This is what the old name-DEPENDENT check
    let leak past fal's content filter. The name appears INTERIOR (not
    sentence-initial), which is the realistic leak shape — a stray name
    surviving mid-sentence."""
    with pytest.raises(AssertionError, match="unknown proper noun"):
        assert_no_proper_nouns(
            _payload("The subject watches as Alice crosses the room."),
            "video_i2v",
            "seeddance-2.0",
            _shot(characters=[]),
        )


def test_unknown_interior_proper_noun_raises():
    with pytest.raises(AssertionError, match="unknown proper noun"):
        assert_no_proper_nouns(
            _payload("The subject walks toward Bartholomew at the far wall."),
            "r2v_multi",
            "seeddance-2.0",
            _shot(characters=[]),
        )


# ── Legitimate caps must NOT false-positive ───────────────────────────────────


def test_sentence_initial_caps_do_not_raise():
    # Every cap here is sentence-initial — legitimate prose, no proper nouns.
    assert_no_proper_nouns(
        _payload("The subject walks. Camera pushes in. Lights flicker overhead."),
        "video_i2v",
        "seeddance-2.0",
        _shot(characters=[]),
    )


def test_cinematic_brand_caps_do_not_raise():
    # Real builder output — film-stock / camera brand names are legitimate.
    assert_no_proper_nouns(
        _payload("Cinematic 5-second video clip. Camera: Medium shot. Shot on Kodak Vision3 500T. Cinematic, high production value."),
        "video_i2v",
        "seeddance-2.0",
        _shot(characters=[]),
    )


def test_image_ref_tokens_do_not_raise():
    # @ImageN ref tokens are not proper nouns.
    assert_no_proper_nouns(
        _payload("@Image1 as the first frame. The subject in @Image1 remains in frame. Same person as @Image1."),
        "video_i2v",
        "seeddance-2.0",
        _shot(characters=[]),
    )


def test_non_video_modality_is_noop():
    # Only video modalities are scanned; an image prompt with a name is ignored.
    assert_no_proper_nouns(
        _payload("Alice in a red coat."),
        "image_t2i",
        "seedream-v4.5",
        _shot(characters=[]),
    )


def test_empty_prompt_is_noop():
    assert_no_proper_nouns(_payload(""), "video_i2v", "seeddance-2.0", _shot([]))