Spaces:
Running
on
Zero
Running
on
Zero
| # PyTorch 2.8 (temporary hack) | |
| import os | |
| os.system('pip install --upgrade --pre --extra-index-url https://download.pytorch.org/whl/nightly/cu126 "torch<2.9" spaces') | |
| # Actual demo code | |
| import spaces | |
| import torch | |
| from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline | |
| from diffusers.models.transformers.transformer_wan import WanTransformer3DModel | |
| from diffusers.utils.export_utils import export_to_video | |
| from diffusers.utils import export_to_video | |
| from transformers import CLIPVisionModel | |
| import gradio as gr | |
| import tempfile | |
| from huggingface_hub import hf_hub_download | |
| import numpy as np | |
| from PIL import Image | |
| import random | |
| from datetime import datetime | |
| from huggingface_hub import login | |
| import os | |
| import time | |
| from PIL import Image | |
| import json | |
| import boto3 | |
| from io import BytesIO | |
| from diffusers.utils import load_image | |
| import random | |
| import gc | |
| from optimization import optimize_pipeline_ | |
| MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers" | |
| MOD_VALUE = 32 | |
| DEFAULT_H_SLIDER_VALUE = 512 | |
| DEFAULT_W_SLIDER_VALUE = 896 | |
| NEW_FORMULA_MAX_AREA = 480.0 * 832.0 | |
| SLIDER_MIN_H, SLIDER_MAX_H = 128, 896 | |
| SLIDER_MIN_W, SLIDER_MAX_W = 128, 896 | |
| MAX_SEED = np.iinfo(np.int32).max | |
| FIXED_FPS = 24 | |
| MIN_FRAMES_MODEL = 8 | |
| MAX_FRAMES_MODEL = 81 | |
| pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, | |
| transformer=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers', | |
| subfolder='transformer', | |
| torch_dtype=torch.bfloat16, | |
| device_map='cuda', | |
| ), | |
| transformer_2=WanTransformer3DModel.from_pretrained('cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers', | |
| subfolder='transformer_2', | |
| torch_dtype=torch.bfloat16, | |
| device_map='cuda', | |
| ), | |
| torch_dtype=torch.bfloat16, | |
| ).to('cuda') | |
| for i in range(3): | |
| gc.collect() | |
| torch.cuda.synchronize() | |
| torch.cuda.empty_cache() | |
| optimize_pipeline_(pipe, | |
| image=Image.new('RGB', (DEFAULT_W_SLIDER_VALUE, DEFAULT_H_SLIDER_VALUE)), | |
| prompt='prompt', | |
| height=DEFAULT_H_SLIDER_VALUE, | |
| width=DEFAULT_W_SLIDER_VALUE, | |
| num_frames=MAX_FRAMES_MODEL, | |
| ) | |
| default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation" | |
| default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature" | |
| class calculateDuration: | |
| def __init__(self, activity_name=""): | |
| self.activity_name = activity_name | |
| def __enter__(self): | |
| self.start_time = time.time() | |
| self.start_time_formatted = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.start_time)) | |
| print(f"Activity: {self.activity_name}, Start time: {self.start_time_formatted}") | |
| return self | |
| def __exit__(self, exc_type, exc_value, traceback): | |
| self.end_time = time.time() | |
| self.elapsed_time = self.end_time - self.start_time | |
| self.end_time_formatted = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.end_time)) | |
| if self.activity_name: | |
| print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds") | |
| else: | |
| print(f"Elapsed time: {self.elapsed_time:.6f} seconds") | |
| def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area, | |
| min_slider_h, max_slider_h, | |
| min_slider_w, max_slider_w, | |
| default_h, default_w): | |
| orig_w, orig_h = pil_image.size | |
| if orig_w <= 0 or orig_h <= 0: | |
| return default_h, default_w | |
| aspect_ratio = orig_h / orig_w | |
| calc_h = round(np.sqrt(calculation_max_area * aspect_ratio)) | |
| calc_w = round(np.sqrt(calculation_max_area / aspect_ratio)) | |
| calc_h = max(mod_val, (calc_h // mod_val) * mod_val) | |
| calc_w = max(mod_val, (calc_w // mod_val) * mod_val) | |
| new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val)) | |
| new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val)) | |
| return new_h, new_w | |
| def upload_video_to_r2(video_file, account_id, access_key, secret_key, bucket_name): | |
| with calculateDuration("Upload video"): | |
| connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com" | |
| s3 = boto3.client( | |
| 's3', | |
| endpoint_url=connectionUrl, | |
| region_name='auto', | |
| aws_access_key_id=access_key, | |
| aws_secret_access_key=secret_key | |
| ) | |
| current_time = datetime.now().strftime("%Y/%m/%d/%H%M%S") | |
| video_remote_path = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.mp4" | |
| with open(video_file, "rb") as f: # 修正关键点 | |
| s3.upload_fileobj(f, bucket_name, video_remote_path) | |
| print("upload finish", video_remote_path) | |
| return video_remote_path | |
| def generate_video(image_url, | |
| prompt, | |
| height, | |
| width, | |
| negative_prompt, | |
| duration_seconds, | |
| guidance_scale, | |
| steps, | |
| seed, | |
| randomize_seed, | |
| upload_to_r2, | |
| account_id, | |
| access_key, | |
| secret_key, | |
| bucket, | |
| progress=gr.Progress(track_tqdm=True)): | |
| if image_url is None: | |
| raise gr.Error("Please upload an input image.") | |
| input_image = load_image(image_url) | |
| target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE) | |
| target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE) | |
| num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL) | |
| current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) | |
| resized_image = input_image.resize((target_w, target_h)) | |
| with torch.inference_mode(): | |
| output_frames_list = pipe( | |
| image=resized_image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=target_h, | |
| width=target_w, | |
| num_frames=num_frames, | |
| guidance_scale=float(guidance_scale), | |
| num_inference_steps=int(steps), | |
| generator=torch.Generator(device="cuda").manual_seed(current_seed) | |
| ).frames[0] | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: | |
| video_path = tmpfile.name | |
| export_to_video(output_frames_list, video_path, fps=FIXED_FPS) | |
| if upload_to_r2: | |
| video_url = upload_video_to_r2(video_path, account_id, access_key, secret_key, bucket) | |
| result = {"status": "success", "message": "upload video success", "url": video_url} | |
| else: | |
| result = {"status": "success", "message": "Image generated but not uploaded", "url": video_path} | |
| return json.dumps(result) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Wan2.2-I2V-A14B-Diffusers") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_url_input = gr.Textbox( | |
| label="Orginal image url", | |
| show_label=True, | |
| max_lines=1, | |
| placeholder="Enter image url for inpainting", | |
| container=False | |
| ) | |
| prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v) | |
| duration_seconds_input = gr.Slider(minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1), maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1), step=0.1, value=2, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3) | |
| seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True) | |
| randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True) | |
| with gr.Row(): | |
| height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})") | |
| width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})") | |
| steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Inference Steps") | |
| guidance_scale_input = gr.Slider(minimum=0.0, maximum=20.0, step=0.5, value=1.0, label="Guidance Scale", visible=True) | |
| with gr.Accordion("R2 Settings", open=False): | |
| upload_to_r2 = gr.Checkbox(label="Upload to R2", value=False) | |
| with gr.Row(): | |
| account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id", value="") | |
| bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here", value="") | |
| with gr.Row(): | |
| access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here", value="") | |
| secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here", value="") | |
| generate_button = gr.Button("Generate Video", variant="primary") | |
| with gr.Column(): | |
| output_json_component = gr.Code(label="JSON Result", language="json", value="{}") | |
| ui_inputs = [ | |
| image_url_input, prompt_input, height_input, width_input, | |
| negative_prompt_input, duration_seconds_input, | |
| guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox, | |
| upload_to_r2, account_id, access_key, secret_key, bucket | |
| ] | |
| generate_button.click( | |
| fn=generate_video, | |
| inputs=ui_inputs, | |
| outputs=output_json_component, | |
| api_name="predict" | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue(api_open=True) | |
| demo.launch(share=True) |