tin2tin · December 9, 2025 16:19
diff --git a/WAN_2.2_TI2V_5B.py b/WAN_2.2_TI2V_5B.py
 import os
 import torch
 import gc
 from diffusers import WanPipeline, WanTransformer3DModel
 from diffusers.utils import export_to_video
 from transformers import BitsAndBytesConfig

 # ==========================================
 # 1. SETTINGS
 # ==========================================
 OUTPUT_FILENAME = "C:/Tmp/5b_t2v_output.mp4"

 # 1280x704 is heavy. If you get Out of Memory, try 960x544 first.
 HEIGHT = 544
 WIDTH = 960
 #HEIGHT = 704
 #WIDTH = 1280 
 NUM_FRAMES = 121
 STEPS = 50 
 GUIDANCE = 5.0

 PROMPT = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
 NEGATIVE_PROMPT = "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"

 # ==========================================
 # 2. OPTIMIZED SETUP
 # ==========================================
 print("--- Initializing & Clearing Memory ---")
 gc.collect()
 torch.cuda.empty_cache()

 MODEL_ID = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"

 # 4-Bit Config: Compresses the 5B model to ~3.5GB VRAM
 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
 )

 # ==========================================
 # 3. LOAD MODELS
 # ==========================================
 print("--- Loading Models (4-bit Quantized) ---")

 # Load the Transformer in 4-bit to save VRAM
 transformer = WanTransformer3DModel.from_pretrained(
    MODEL_ID,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16
 )

 # Load the Pipeline
 # Note: We let the pipeline load the VAE and Text Encoder (T5) automatically.
 # T5 is large (~10GB), so we rely on cpu_offload to handle it.
 pipe = WanPipeline.from_pretrained(
    MODEL_ID,
    transformer=transformer,
    torch_dtype=torch.bfloat16
 )

 # ==========================================
 # 4. MEMORY OPTIMIZATIONS (CRITICAL)
 # ==========================================
 # This moves models to CPU when not in use. 
 # Sequence: Text Encoder (GPU) -> Transformer (GPU) -> VAE (GPU)
 pipe.enable_model_cpu_offload()

 # Enable VAE Slicing to decode 121 frames without crashing
 # This decodes 1 frame at a time instead of all 121 at once.
 pipe.vae.enable_slicing() 

 # Only enable tiling if you still OOM (it creates grid artifacts sometimes)
 # pipe.vae.enable_tiling() 

 # ==========================================
 # 5. GENERATE
 # ==========================================
 print(f"--- Generating {NUM_FRAMES} frames at {WIDTH}x{HEIGHT} ---")
 torch.cuda.empty_cache()

 # Optional: Set a seed for reproducibility
 generator = torch.Generator(device="cuda").manual_seed(42)

 output = pipe(
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    height=HEIGHT,
    width=WIDTH,
    num_frames=NUM_FRAMES,
    guidance_scale=GUIDANCE,
    num_inference_steps=STEPS,
    generator=generator
 ).frames[0]

 # ==========================================
 # 6. SAVE
 # ==========================================
 print("--- Saving Video ---")
 export_to_video(output, OUTPUT_FILENAME, fps=24)
 print(f"Done! Saved to {OUTPUT_FILENAME}")
	import os
	import torch
	import gc
	from diffusers import WanPipeline, WanTransformer3DModel
	from diffusers.utils import export_to_video
	from transformers import BitsAndBytesConfig

	# ==========================================
	# 1. SETTINGS
	# ==========================================
	OUTPUT_FILENAME = "C:/Tmp/5b_t2v_output.mp4"

	# 1280x704 is heavy. If you get Out of Memory, try 960x544 first.
	HEIGHT = 544
	WIDTH = 960
	#HEIGHT = 704
	#WIDTH = 1280
	NUM_FRAMES = 121
	STEPS = 50
	GUIDANCE = 5.0

	PROMPT = "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
	NEGATIVE_PROMPT = "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"

	# ==========================================
	# 2. OPTIMIZED SETUP
	# ==========================================
	print("--- Initializing & Clearing Memory ---")
	gc.collect()
	torch.cuda.empty_cache()

	MODEL_ID = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"

	# 4-Bit Config: Compresses the 5B model to ~3.5GB VRAM
	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# ==========================================
	# 3. LOAD MODELS
	# ==========================================
	print("--- Loading Models (4-bit Quantized) ---")

	# Load the Transformer in 4-bit to save VRAM
	transformer = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16
	)

	# Load the Pipeline
	# Note: We let the pipeline load the VAE and Text Encoder (T5) automatically.
	# T5 is large (~10GB), so we rely on cpu_offload to handle it.
	pipe = WanPipeline.from_pretrained(
	MODEL_ID,
	transformer=transformer,
	torch_dtype=torch.bfloat16
	)

	# ==========================================
	# 4. MEMORY OPTIMIZATIONS (CRITICAL)
	# ==========================================
	# This moves models to CPU when not in use.
	# Sequence: Text Encoder (GPU) -> Transformer (GPU) -> VAE (GPU)
	pipe.enable_model_cpu_offload()

	# Enable VAE Slicing to decode 121 frames without crashing
	# This decodes 1 frame at a time instead of all 121 at once.
	pipe.vae.enable_slicing()

	# Only enable tiling if you still OOM (it creates grid artifacts sometimes)
	# pipe.vae.enable_tiling()

	# ==========================================
	# 5. GENERATE
	# ==========================================
	print(f"--- Generating {NUM_FRAMES} frames at {WIDTH}x{HEIGHT} ---")
	torch.cuda.empty_cache()

	# Optional: Set a seed for reproducibility
	generator = torch.Generator(device="cuda").manual_seed(42)

	output = pipe(
	prompt=PROMPT,
	negative_prompt=NEGATIVE_PROMPT,
	height=HEIGHT,
	width=WIDTH,
	num_frames=NUM_FRAMES,
	guidance_scale=GUIDANCE,
	num_inference_steps=STEPS,
	generator=generator
	).frames[0]

	# ==========================================
	# 6. SAVE
	# ==========================================
	print("--- Saving Video ---")
	export_to_video(output, OUTPUT_FILENAME, fps=24)
	print(f"Done! Saved to {OUTPUT_FILENAME}")
No results found