import torch import gradio as gr import numpy as np import random from diffusers import WanPipeline from diffusers.utils import export_to_video import tempfile import spaces MODEL_ID = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers" MOD_VALUE = 32 FIXED_FPS = 16 MIN_FRAMES_MODEL = 16 MAX_FRAMES_MODEL = 96 MAX_SEED = 2147483647 DEFAULT_PROMPT = "A beautiful sunset over the ocean, cinematic, dynamic motion" DEFAULT_NEGATIVE_PROMPT = "blurry, low quality, distorted, ugly, bad anatomy, grainy, low-res, overexposed" DEFAULT_H = 480 DEFAULT_W = 832 pipe = WanPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, ) @spaces.GPU(duration=120) def generate_video(prompt, height, width, negative_prompt, duration_seconds, guidance_scale, steps, seed, randomize_seed): device = "cuda" if torch.cuda.is_available() else "cpu" pipe.to(device) target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE) target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE) num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL) current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) with torch.inference_mode(): output_frames_list = pipe( prompt=prompt, negative_prompt=negative_prompt, height=target_h, width=target_w, num_frames=num_frames, guidance_scale=float(guidance_scale), num_inference_steps=int(steps), generator=torch.Generator(device=device).manual_seed(current_seed) ).frames[0] pipe.to("cpu") torch.cuda.empty_cache() with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: video_path = tmpfile.name export_to_video(output_frames_list, video_path, fps=FIXED_FPS) return video_path, current_seed with gr.Blocks() as demo: gr.Markdown("# Wan 2.1 Text-to-Video (1.3B)") gr.Markdown("Generate videos from text prompts using Wan2.1 1.3B model") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", value=DEFAULT_PROMPT, lines=3) duration_seconds = gr.Slider(minimum=1.0, maximum=5.0, step=0.5, value=2.0, label="Duration (seconds)") with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT, lines=2) seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) steps = gr.Slider(minimum=1, maximum=50, step=1, value=20, label="Inference Steps") guidance_scale = gr.Slider(minimum=1.0, maximum=10.0, step=0.5, value=5.0, label="Guidance Scale") with gr.Row(): height = gr.Slider(minimum=256, maximum=720, step=MOD_VALUE, value=DEFAULT_H, label="Height") width = gr.Slider(minimum=256, maximum=1280, step=MOD_VALUE, value=DEFAULT_W, label="Width") with gr.Column(): video_output = gr.Video(label="Generated Video", autoplay=True) generate_button = gr.Button("Generate Video", variant="primary") generate_button.click( fn=generate_video, inputs=[prompt, height, width, negative_prompt, duration_seconds, guidance_scale, steps, seed, randomize_seed], outputs=[video_output, seed] ) if __name__ == "__main__": demo.queue().launch()