Skip to content

Instantly share code, notes, and snippets.

@tin2tin
Created December 9, 2025 19:40
Show Gist options
  • Select an option

  • Save tin2tin/067c07d3aa33012b7a652d84c1c133ee to your computer and use it in GitHub Desktop.

Select an option

Save tin2tin/067c07d3aa33012b7a652d84c1c133ee to your computer and use it in GitHub Desktop.
Wan 2.2 i2v under 18 GB VRAM and 32 GB RAM
import os
import torch
import gc
from diffusers import WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler
from diffusers.utils import export_to_video
from transformers import BitsAndBytesConfig
# ==========================================
# 1. USER SETTINGS
# ==========================================
PROMPT = "A cinematic close-up of a cat wearing a black fedora hat, looking around, subtle movement, high quality, 4k, indoors"
NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts, color banding"
OUTPUT_PATH = "C:/Tmp/wan_t2v_turbo_result.mp4"
# Resolution: 832x480 is standard.
# You can try 1280x720, but it might OOM on 24GB even with 4-bit.
WIDTH = 832
HEIGHT = 480
NUM_FRAMES = 81
# ==========================================
# 2. SETUP & CLEANUP
# ==========================================
print("--- Initializing ---")
gc.collect()
torch.cuda.empty_cache()
MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
# 4-Bit Configuration (Mandatory for 24GB VRAM)
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# ==========================================
# 3. LOAD MODELS (4-BIT QUANTIZED)
# ==========================================
print("--- Loading Models ---")
# Load Transformer 1 (High Noise)
transformer_high = WanTransformer3DModel.from_pretrained(
MODEL_ID,
subfolder="transformer",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
# Load Transformer 2 (Low Noise)
transformer_low = WanTransformer3DModel.from_pretrained(
MODEL_ID,
subfolder="transformer_2",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
# Create Pipeline (WanPipeline for Text-to-Video)
pipe = WanPipeline.from_pretrained(
MODEL_ID,
transformer=transformer_high,
transformer_2=transformer_low,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True
)
# ==========================================
# 4. MEMORY & SCHEDULER SETUP
# ==========================================
print("--- Configuring Optimization ---")
# 1. CPU Offload (Saves VRAM)
pipe.enable_model_cpu_offload()
# 2. Fix "Ghosting/Overlay" Artifacts
# We enable slicing (frame-by-frame decode) and DISABLE tiling.
pipe.vae.enable_slicing()
pipe.vae.disable_tiling()
# 3. Set Scheduler to Euler + Shift 5.0 (Required for Lightx2v LoRA)
pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
pipe.scheduler.config,
shift=5.0,
use_dynamic_shifting=False
)
# ==========================================
# 5. LOAD T2V LORA (FROM SCREENSHOT)
# ==========================================
print("--- Loading T2V Turbo LoRA ---")
try:
# Filename from your screenshot: lightx2v_T2V_14B_...
LORA_FILENAME = "Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank128_bf16.safetensors"
# Load into High Noise Transformer
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name=LORA_FILENAME,
adapter_name="lightx2v"
)
# Load into Low Noise Transformer
pipe.load_lora_weights(
"Kijai/WanVideo_comfy",
weight_name=LORA_FILENAME,
adapter_name="lightx2v_2",
load_into_transformer_2=True
)
pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
print("T2V LoRA loaded successfully.")
except Exception as e:
print(f"LoRA Load Failed: {e}")
print("STOPPING: This script is optimized for the LoRA. Running without it might produce bad results at low steps.")
# ==========================================
# 6. GENERATE
# ==========================================
print("--- Generating Video ---")
gc.collect()
torch.cuda.empty_cache()
seed = 42
generator = torch.Generator(device="cuda").manual_seed(seed)
output_frames = pipe(
prompt=PROMPT,
negative_prompt=NEGATIVE_PROMPT,
height=HEIGHT,
width=WIDTH,
num_frames=NUM_FRAMES,
# 8 Steps is the "safe" spot for this LoRA to avoid artifacts
num_inference_steps=8,
# Distilled models usually need Guidance = 1.0 or very low (e.g. 1.5)
guidance_scale=1.0,
generator=generator
).frames[0]
# ==========================================
# 7. SAVE RESULT
# ==========================================
export_to_video(output_frames, OUTPUT_PATH, fps=16)
print(f"DONE! Video saved to: {OUTPUT_PATH}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment