Skip to content

Instantly share code, notes, and snippets.

@tin2tin
Last active December 9, 2025 15:01
Show Gist options
  • Select an option

  • Save tin2tin/541272f0e531bff16c99df5b4a5a1486 to your computer and use it in GitHub Desktop.

Select an option

Save tin2tin/541272f0e531bff16c99df5b4a5a1486 to your computer and use it in GitHub Desktop.
Wan 2.2 i2v under 18 GB VRAM and 32 GB RAM
import os
import torch
import gc
from diffusers import WanImageToVideoPipeline, WanTransformer3DModel
from diffusers.utils import export_to_video, load_image
from transformers import BitsAndBytesConfig
from PIL import Image
PROMPT = "A cinematic shot of a white cat surfing on a wave, wearing sunglasses, sunny beach background, high quality, 4k"
NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts"
INPUT_IMAGE_PATH = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"
OUTPUT_PATH = "C:/Tmp/wan_result.mp4"
print("--- Initializing ---")
gc.collect()
torch.cuda.empty_cache()
MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
print("--- Loading Models (This takes a moment) ---")
transformer_high = WanTransformer3DModel.from_pretrained(
MODEL_ID,
subfolder="transformer",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
transformer_low = WanTransformer3DModel.from_pretrained(
MODEL_ID,
subfolder="transformer_2",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
pipe = WanImageToVideoPipeline.from_pretrained(
MODEL_ID,
transformer=transformer_high,
transformer_2=transformer_low,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
pipe.enable_model_cpu_offload()
print("--- Loading LoRAs ---")
try:
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v"
)
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
adapter_name="lightx2v_2",
load_into_transformer_2=True
)
pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
print("LoRAs loaded successfully.")
except Exception as e:
print(f"LoRA Load Failed: {e}")
print("Continuing without LoRA (Standard Speed)...")
print("--- Preparing Image ---")
if INPUT_IMAGE_PATH.startswith("http"):
input_image = load_image(INPUT_IMAGE_PATH)
else:
input_image = Image.open(INPUT_IMAGE_PATH)
def resize_for_wan(image, max_dim=832):
w, h = image.size
scale = max_dim / max(w, h)
new_w = int(w * scale)
new_h = int(h * scale)
new_w = (new_w // 16) * 16
new_h = (new_h // 16) * 16
return image.resize((new_w, new_h), Image.LANCZOS)
input_image = resize_for_wan(input_image)
print("--- Generating Video ---")
gc.collect()
torch.cuda.empty_cache()
seed = 42
generator = torch.Generator(device="cuda").manual_seed(seed)
output_frames = pipe(
image=input_image,
prompt=PROMPT,
negative_prompt=NEGATIVE_PROMPT,
height=input_image.height,
width=input_image.width,
num_frames=81,
num_inference_steps=8, #Needs this number or it'll crossfade in the middle.
guidance_scale=1.0,
guidance_scale_2=1.0,
generator=generator
).frames[0]
export_to_video(output_frames, OUTPUT_PATH, fps=16)
print(f"DONE! Video saved to: {OUTPUT_PATH}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment