tin2tin · December 9, 2025 19:40
diff --git a/wan_i2v.py b/wan_i2v.py
 import os
 import torch
 import gc
 from diffusers import WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler
 from diffusers.utils import export_to_video
 from transformers import BitsAndBytesConfig

 # ==========================================
 # 1. USER SETTINGS
 # ==========================================
 PROMPT = "A cinematic close-up of a cat wearing a black fedora hat, looking around, subtle movement, high quality, 4k, indoors"
 NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts, color banding"

 OUTPUT_PATH = "C:/Tmp/wan_t2v_turbo_result.mp4"

 # Resolution: 832x480 is standard. 
 # You can try 1280x720, but it might OOM on 24GB even with 4-bit.
 WIDTH = 832
 HEIGHT = 480
 NUM_FRAMES = 81

 # ==========================================
 # 2. SETUP & CLEANUP
 # ==========================================
 print("--- Initializing ---")
 gc.collect()
 torch.cuda.empty_cache()

 MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"

 # 4-Bit Configuration (Mandatory for 24GB VRAM)
 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
 )

 # ==========================================
 # 3. LOAD MODELS (4-BIT QUANTIZED)
 # ==========================================
 print("--- Loading Models ---")

 # Load Transformer 1 (High Noise)
 transformer_high = WanTransformer3DModel.from_pretrained(
    MODEL_ID,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 # Load Transformer 2 (Low Noise)
 transformer_low = WanTransformer3DModel.from_pretrained(
    MODEL_ID,
    subfolder="transformer_2",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 # Create Pipeline (WanPipeline for Text-to-Video)
 pipe = WanPipeline.from_pretrained(
    MODEL_ID,
    transformer=transformer_high,
    transformer_2=transformer_low,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 # ==========================================
 # 4. MEMORY & SCHEDULER SETUP
 # ==========================================
 print("--- Configuring Optimization ---")

 # 1. CPU Offload (Saves VRAM)
 pipe.enable_model_cpu_offload()

 # 2. Fix "Ghosting/Overlay" Artifacts
 # We enable slicing (frame-by-frame decode) and DISABLE tiling.
 pipe.vae.enable_slicing()
 pipe.vae.disable_tiling()

 # 3. Set Scheduler to Euler + Shift 5.0 (Required for Lightx2v LoRA)
 pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
    pipe.scheduler.config,
    shift=5.0,
    use_dynamic_shifting=False
 )

 # ==========================================
 # 5. LOAD T2V LORA (FROM SCREENSHOT)
 # ==========================================
 print("--- Loading T2V Turbo LoRA ---")
 try:
    # Filename from your screenshot: lightx2v_T2V_14B_...
    LORA_FILENAME = "Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank128_bf16.safetensors"
    
    # Load into High Noise Transformer
    pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name=LORA_FILENAME,
        adapter_name="lightx2v"
    )
    # Load into Low Noise Transformer
    pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name=LORA_FILENAME,
        adapter_name="lightx2v_2",
        load_into_transformer_2=True
    )
    
    pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
    print("T2V LoRA loaded successfully.")
    
 except Exception as e:
    print(f"LoRA Load Failed: {e}")
    print("STOPPING: This script is optimized for the LoRA. Running without it might produce bad results at low steps.")

 # ==========================================
 # 6. GENERATE
 # ==========================================
 print("--- Generating Video ---")
 gc.collect()
 torch.cuda.empty_cache()

 seed = 42
 generator = torch.Generator(device="cuda").manual_seed(seed)

 output_frames = pipe(
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    height=HEIGHT,
    width=WIDTH,
    num_frames=NUM_FRAMES,
    # 8 Steps is the "safe" spot for this LoRA to avoid artifacts
    num_inference_steps=8,  
    # Distilled models usually need Guidance = 1.0 or very low (e.g. 1.5)
    guidance_scale=1.0,     
    generator=generator
 ).frames[0]

 # ==========================================
 # 7. SAVE RESULT
 # ==========================================
 export_to_video(output_frames, OUTPUT_PATH, fps=16)
 print(f"DONE! Video saved to: {OUTPUT_PATH}")
	import os
	import torch
	import gc
	from diffusers import WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler
	from diffusers.utils import export_to_video
	from transformers import BitsAndBytesConfig

	# ==========================================
	# 1. USER SETTINGS
	# ==========================================
	PROMPT = "A cinematic close-up of a cat wearing a black fedora hat, looking around, subtle movement, high quality, 4k, indoors"
	NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts, color banding"

	OUTPUT_PATH = "C:/Tmp/wan_t2v_turbo_result.mp4"

	# Resolution: 832x480 is standard.
	# You can try 1280x720, but it might OOM on 24GB even with 4-bit.
	WIDTH = 832
	HEIGHT = 480
	NUM_FRAMES = 81

	# ==========================================
	# 2. SETUP & CLEANUP
	# ==========================================
	print("--- Initializing ---")
	gc.collect()
	torch.cuda.empty_cache()

	MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"

	# 4-Bit Configuration (Mandatory for 24GB VRAM)
	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	# ==========================================
	# 3. LOAD MODELS (4-BIT QUANTIZED)
	# ==========================================
	print("--- Loading Models ---")

	# Load Transformer 1 (High Noise)
	transformer_high = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	# Load Transformer 2 (Low Noise)
	transformer_low = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer_2",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	# Create Pipeline (WanPipeline for Text-to-Video)
	pipe = WanPipeline.from_pretrained(
	MODEL_ID,
	transformer=transformer_high,
	transformer_2=transformer_low,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	# ==========================================
	# 4. MEMORY & SCHEDULER SETUP
	# ==========================================
	print("--- Configuring Optimization ---")

	# 1. CPU Offload (Saves VRAM)
	pipe.enable_model_cpu_offload()

	# 2. Fix "Ghosting/Overlay" Artifacts
	# We enable slicing (frame-by-frame decode) and DISABLE tiling.
	pipe.vae.enable_slicing()
	pipe.vae.disable_tiling()

	# 3. Set Scheduler to Euler + Shift 5.0 (Required for Lightx2v LoRA)
	pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(
	pipe.scheduler.config,
	shift=5.0,
	use_dynamic_shifting=False
	)

	# ==========================================
	# 5. LOAD T2V LORA (FROM SCREENSHOT)
	# ==========================================
	print("--- Loading T2V Turbo LoRA ---")
	try:
	# Filename from your screenshot: lightx2v_T2V_14B_...
	LORA_FILENAME = "Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank128_bf16.safetensors"

	# Load into High Noise Transformer
	pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name=LORA_FILENAME,
	adapter_name="lightx2v"
	)
	# Load into Low Noise Transformer
	pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name=LORA_FILENAME,
	adapter_name="lightx2v_2",
	load_into_transformer_2=True
	)

	pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
	print("T2V LoRA loaded successfully.")

	except Exception as e:
	print(f"LoRA Load Failed: {e}")
	print("STOPPING: This script is optimized for the LoRA. Running without it might produce bad results at low steps.")

	# ==========================================
	# 6. GENERATE
	# ==========================================
	print("--- Generating Video ---")
	gc.collect()
	torch.cuda.empty_cache()

	seed = 42
	generator = torch.Generator(device="cuda").manual_seed(seed)

	output_frames = pipe(
	prompt=PROMPT,
	negative_prompt=NEGATIVE_PROMPT,
	height=HEIGHT,
	width=WIDTH,
	num_frames=NUM_FRAMES,
	# 8 Steps is the "safe" spot for this LoRA to avoid artifacts
	num_inference_steps=8,
	# Distilled models usually need Guidance = 1.0 or very low (e.g. 1.5)
	guidance_scale=1.0,
	generator=generator
	).frames[0]

	# ==========================================
	# 7. SAVE RESULT
	# ==========================================
	export_to_video(output_frames, OUTPUT_PATH, fps=16)
	print(f"DONE! Video saved to: {OUTPUT_PATH}")
No results found