#!/usr/bin/env python3
"""talking_shot.py — generate a lip-synced talking-head video by feeding
Seedance R2V a blank video that carries the dialogue audio + an identity
reference image + a prompt.

Seedance reads the audio off the reference video, ignores the blank
visuals, and generates the visual content (including mouth movement
synced to the dialogue) from the identity ref + prompt. Single forward
pass — no sync.so post-process.

Usage:
    python3 tools/talking_shot.py \\
        --project tartarus \\
        --identity-ref TORCH \\
        --audio /path/to/dialogue.mp3 \\
        --prompt "Torch leans toward camera, intensity rising"

Optional:
    --duration N        Override audio length (default: ffprobe of audio)
    --aspect-ratio R    9:16 / 16:9 / 1:1 (default: 9:16)
    --resolution R      480p / 720p / 1080p (default: 720p)
    --model M           Seedance variant (default: seeddance-2.0)
    --pass-id ID        Override auto-generated pass id
    --dry-run           Bundle blank+audio MP4 but don't dispatch
    --keep-bundle       Don't delete the temp blank+audio MP4 after dispatch
"""

import argparse
import shutil
import subprocess
import sys
import tempfile
import time
from pathlib import Path

ASPECT_TO_DIMS = {
    "9:16": {"480p": (270, 480), "720p": (720, 1280), "1080p": (1080, 1920)},
    "16:9": {"480p": (854, 480), "720p": (1280, 720), "1080p": (1920, 1080)},
    "1:1":  {"480p": (480, 480), "720p": (720, 720),  "1080p": (1080, 1080)},
}

DISPATCH_CLI = Path(__file__).parent / "dispatch_cli.py"


def _ffprobe_duration(audio_path: Path) -> float:
    out = subprocess.check_output(
        ["ffprobe", "-v", "quiet",
         "-show_entries", "format=duration",
         "-of", "csv=p=0", str(audio_path)],
        text=True,
    ).strip()
    return float(out)


def _bundle_blank_with_audio(
    audio_path: Path,
    duration: float,
    width: int,
    height: int,
    out_path: Path,
) -> None:
    cmd = [
        "ffmpeg", "-y",
        "-f", "lavfi",
        "-i", f"color=black:s={width}x{height}:d={duration:.3f}",
        "-i", str(audio_path),
        "-c:v", "libx264",
        "-c:a", "aac",
        "-shortest",
        str(out_path),
    ]
    subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)


def _bundle_image_with_audio(
    image_path: Path,
    audio_path: Path,
    duration: float,
    width: int,
    height: int,
    out_path: Path,
) -> None:
    vf = (
        f"scale={width}:{height}:force_original_aspect_ratio=increase,"
        f"crop={width}:{height},"
        "fps=24,format=yuv420p"
    )
    cmd = [
        "ffmpeg", "-y",
        "-loop", "1",
        "-i", str(image_path),
        "-i", str(audio_path),
        "-t", f"{duration:.3f}",
        "-vf", vf,
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        "-c:a", "aac",
        "-shortest",
        str(out_path),
    ]
    subprocess.check_call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Generate a lip-synced talking-head video via the "
                    "Seedance-R2V blank-video-with-audio hack.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    parser.add_argument("--project", required=True,
                        help="Project name (resolves identity ref via "
                             "projects/{project}/...)")
    parser.add_argument("--identity-ref", required=True,
                        help="Asset ID for the identity image (e.g. TORCH, "
                             "JADE). Resolved by dispatch_cli's "
                             "_resolve_client_ref.")
    parser.add_argument("--audio", required=True, type=Path,
                        help="Path to dialogue audio (mp3/wav/m4a/...)")
    parser.add_argument("--prompt", required=True,
                        help="Visual prompt for the shot. Describe what's "
                             "happening visually; the lip sync comes from "
                             "the audio automatically.")
    parser.add_argument("--duration", type=float, default=None,
                        help="Video duration in seconds. Default: detected "
                             "from audio file via ffprobe.")
    parser.add_argument("--aspect-ratio", choices=list(ASPECT_TO_DIMS),
                        default="9:16",
                        help="Aspect ratio for the blank carrier video "
                             "(default: 9:16)")
    parser.add_argument("--resolution", choices=["480p", "720p", "1080p"],
                        default="720p")
    parser.add_argument("--model", default="seeddance-2.0",
                        help="Seedance model id (default: seeddance-2.0)")
    parser.add_argument("--pass-id", default=None,
                        help="Override pass id (default: TALKING_<timestamp>)")
    parser.add_argument("--dry-run", action="store_true",
                        help="Build the blank+audio MP4, print the dispatch "
                             "command, but don't submit.")
    parser.add_argument("--keep-bundle", action="store_true",
                        help="Don't delete the temp blank+audio MP4 after "
                             "dispatch (useful for debugging).")
    parser.add_argument("--carrier-image", type=Path, default=None,
                        help="Use a still image (looped to duration) as the "
                             "carrier video instead of a blank black frame. "
                             "Tests the AI Filmmaking Academy 'Performance "
                             "Anchor' hypothesis: a still face image in the "
                             "temporal channel may give Seedance a per-frame "
                             "face anchor that improves face fidelity, "
                             "lipsync, and audio passthrough vs blank+audio.")
    parser.add_argument("--no-audio", action="store_true",
                        help="Pass --no-audio through to dispatch_cli — "
                             "tells Seedance NOT to generate its own audio, "
                             "preserving the passthrough audio from the "
                             "reference video. Use this for multi-shot or "
                             "anywhere Seedance's generated audio competes "
                             "with the dialogue track.")
    args = parser.parse_args()

    if not args.audio.exists():
        print(f"ERROR: --audio not found: {args.audio}", file=sys.stderr)
        return 1
    if not shutil.which("ffmpeg") or not shutil.which("ffprobe"):
        print("ERROR: ffmpeg/ffprobe not on PATH.", file=sys.stderr)
        return 1

    duration = args.duration or _ffprobe_duration(args.audio)
    width, height = ASPECT_TO_DIMS[args.aspect_ratio][args.resolution]
    pass_id = args.pass_id or f"TALKING_{int(time.time())}"

    if args.carrier_image is not None:
        if not args.carrier_image.exists():
            print(f"ERROR: --carrier-image not found: {args.carrier_image}", file=sys.stderr)
            return 1
        bundle = Path(tempfile.gettempdir()) / f"{pass_id}_image_audio.mp4"
        print(f"Bundling still-image video + audio: {bundle.name}")
        print(f"  duration={duration:.2f}s  size={width}x{height}  "
              f"image={args.carrier_image.name}  audio={args.audio.name}")
        _bundle_image_with_audio(
            args.carrier_image, args.audio, duration, width, height, bundle,
        )
    else:
        bundle = Path(tempfile.gettempdir()) / f"{pass_id}_blank_audio.mp4"
        print(f"Bundling blank video + audio: {bundle.name}")
        print(f"  duration={duration:.2f}s  size={width}x{height}  "
              f"audio={args.audio.name}")
        _bundle_blank_with_audio(args.audio, duration, width, height, bundle)

    cmd = [
        "python3", str(DISPATCH_CLI),
        "--project", args.project,
        "--model", args.model,
        "--seedance-refs", args.identity_ref,
        "--ref-video", str(bundle),
        "--prompt", args.prompt,
        "--duration", str(int(round(duration))),
        "--aspect-ratio", args.aspect_ratio,
        "--resolution", args.resolution,
        "--pass-id", pass_id,
    ]
    if args.dry_run:
        cmd.append("--dry-run")
    if args.no_audio:
        cmd.append("--no-audio")

    print()
    print("Dispatching to Seedance R2V:")
    print("  " + " ".join(cmd))
    print()
    try:
        rc = subprocess.call(cmd)
    finally:
        if not args.keep_bundle and bundle.exists():
            bundle.unlink()

    return rc


if __name__ == "__main__":
    sys.exit(main())