import torch
import gradio as gr
import numpy as np
import random
from diffusers import WanPipeline
from diffusers.utils import export_to_video
import tempfile
import spaces

MODEL_ID = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
MOD_VALUE = 32
FIXED_FPS = 16
MIN_FRAMES_MODEL = 16
MAX_FRAMES_MODEL = 96
MAX_SEED = 2147483647
DEFAULT_PROMPT = "A beautiful sunset over the ocean, cinematic, dynamic motion"
DEFAULT_NEGATIVE_PROMPT = "blurry, low quality, distorted, ugly, bad anatomy, grainy, low-res, overexposed"
DEFAULT_H = 480
DEFAULT_W = 832

pipe = WanPipeline.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
)

@spaces.GPU(duration=120)
def generate_video(prompt, height, width, negative_prompt, duration_seconds, guidance_scale, steps, seed, randomize_seed):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe.to(device)
    target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
    target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
    num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)

    with torch.inference_mode():
        output_frames_list = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            height=target_h,
            width=target_w,
            num_frames=num_frames,
            guidance_scale=float(guidance_scale),
            num_inference_steps=int(steps),
            generator=torch.Generator(device=device).manual_seed(current_seed)
        ).frames[0]

    pipe.to("cpu")
    torch.cuda.empty_cache()

    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
        video_path = tmpfile.name
        export_to_video(output_frames_list, video_path, fps=FIXED_FPS)

    return video_path, current_seed

with gr.Blocks() as demo:
    gr.Markdown("# Wan 2.1 Text-to-Video (1.3B)")
    gr.Markdown("Generate videos from text prompts using Wan2.1 1.3B model")

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Prompt", value=DEFAULT_PROMPT, lines=3)
            duration_seconds = gr.Slider(minimum=1.0, maximum=5.0, step=0.5, value=2.0, label="Duration (seconds)")

            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT, lines=2)
                seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                steps = gr.Slider(minimum=1, maximum=50, step=1, value=20, label="Inference Steps")
                guidance_scale = gr.Slider(minimum=1.0, maximum=10.0, step=0.5, value=5.0, label="Guidance Scale")
                with gr.Row():
                    height = gr.Slider(minimum=256, maximum=720, step=MOD_VALUE, value=DEFAULT_H, label="Height")
                    width = gr.Slider(minimum=256, maximum=1280, step=MOD_VALUE, value=DEFAULT_W, label="Width")

        with gr.Column():
            video_output = gr.Video(label="Generated Video", autoplay=True)
            generate_button = gr.Button("Generate Video", variant="primary")

    generate_button.click(
        fn=generate_video,
        inputs=[prompt, height, width, negative_prompt, duration_seconds, guidance_scale, steps, seed, randomize_seed],
        outputs=[video_output, seed]
    )

if __name__ == "__main__":
    demo.queue().launch()