tin2tin · December 9, 2025 15:01
diff --git a/wan_example.py b/wan_example.py
 import os
 import torch
 import gc
 from diffusers import WanImageToVideoPipeline, WanTransformer3DModel
 from diffusers.utils import export_to_video, load_image
 from transformers import BitsAndBytesConfig
 from PIL import Image

 PROMPT = "A cinematic shot of a white cat surfing on a wave, wearing sunglasses, sunny beach background, high quality, 4k"
 NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts"

 INPUT_IMAGE_PATH = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"

 OUTPUT_PATH = "C:/Tmp/wan_result.mp4"

 print("--- Initializing ---")
 gc.collect()
 torch.cuda.empty_cache()

 MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
 )

 print("--- Loading Models (This takes a moment) ---")

 transformer_high = WanTransformer3DModel.from_pretrained(
    MODEL_ID,
    subfolder="transformer",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 transformer_low = WanTransformer3DModel.from_pretrained(
    MODEL_ID,
    subfolder="transformer_2",
    quantization_config=nf4_config,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 pipe = WanImageToVideoPipeline.from_pretrained(
    MODEL_ID,
    transformer=transformer_high,
    transformer_2=transformer_low,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
 )

 pipe.enable_model_cpu_offload()

 print("--- Loading LoRAs ---")
 try:
    pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
        adapter_name="lightx2v"
    )
    pipe.load_lora_weights(
        "Kijai/WanVideo_comfy",
        weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
        adapter_name="lightx2v_2",
        load_into_transformer_2=True
    )
    pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
    print("LoRAs loaded successfully.")
 except Exception as e:
    print(f"LoRA Load Failed: {e}")
    print("Continuing without LoRA (Standard Speed)...")

 print("--- Preparing Image ---")
 if INPUT_IMAGE_PATH.startswith("http"):
    input_image = load_image(INPUT_IMAGE_PATH)
 else:
    input_image = Image.open(INPUT_IMAGE_PATH)

 def resize_for_wan(image, max_dim=832):
    w, h = image.size
    scale = max_dim / max(w, h)
    new_w = int(w * scale)
    new_h = int(h * scale)
    new_w = (new_w // 16) * 16
    new_h = (new_h // 16) * 16
    return image.resize((new_w, new_h), Image.LANCZOS)

 input_image = resize_for_wan(input_image)

 print("--- Generating Video ---")
 gc.collect()
 torch.cuda.empty_cache()

 seed = 42
 generator = torch.Generator(device="cuda").manual_seed(seed)

 output_frames = pipe(
    image=input_image,
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    height=input_image.height,
    width=input_image.width,
    num_frames=81,
    num_inference_steps=8, #Needs this number or it'll crossfade in the middle.
    guidance_scale=1.0,
    guidance_scale_2=1.0,
    generator=generator
 ).frames[0]

 export_to_video(output_frames, OUTPUT_PATH, fps=16)
 print(f"DONE! Video saved to: {OUTPUT_PATH}")
	import os
	import torch
	import gc
	from diffusers import WanImageToVideoPipeline, WanTransformer3DModel
	from diffusers.utils import export_to_video, load_image
	from transformers import BitsAndBytesConfig
	from PIL import Image

	PROMPT = "A cinematic shot of a white cat surfing on a wave, wearing sunglasses, sunny beach background, high quality, 4k"
	NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts"

	INPUT_IMAGE_PATH = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG"

	OUTPUT_PATH = "C:/Tmp/wan_result.mp4"

	print("--- Initializing ---")
	gc.collect()
	torch.cuda.empty_cache()

	MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"

	nf4_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	print("--- Loading Models (This takes a moment) ---")

	transformer_high = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	transformer_low = WanTransformer3DModel.from_pretrained(
	MODEL_ID,
	subfolder="transformer_2",
	quantization_config=nf4_config,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	pipe = WanImageToVideoPipeline.from_pretrained(
	MODEL_ID,
	transformer=transformer_high,
	transformer_2=transformer_low,
	torch_dtype=torch.bfloat16,
	low_cpu_mem_usage=True
	)

	pipe.enable_model_cpu_offload()

	print("--- Loading LoRAs ---")
	try:
	pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
	adapter_name="lightx2v"
	)
	pipe.load_lora_weights(
	"Kijai/WanVideo_comfy",
	weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
	adapter_name="lightx2v_2",
	load_into_transformer_2=True
	)
	pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0])
	print("LoRAs loaded successfully.")
	except Exception as e:
	print(f"LoRA Load Failed: {e}")
	print("Continuing without LoRA (Standard Speed)...")

	print("--- Preparing Image ---")
	if INPUT_IMAGE_PATH.startswith("http"):
	input_image = load_image(INPUT_IMAGE_PATH)
	else:
	input_image = Image.open(INPUT_IMAGE_PATH)

	def resize_for_wan(image, max_dim=832):
	w, h = image.size
	scale = max_dim / max(w, h)
	new_w = int(w * scale)
	new_h = int(h * scale)
	new_w = (new_w // 16) * 16
	new_h = (new_h // 16) * 16
	return image.resize((new_w, new_h), Image.LANCZOS)

	input_image = resize_for_wan(input_image)

	print("--- Generating Video ---")
	gc.collect()
	torch.cuda.empty_cache()

	seed = 42
	generator = torch.Generator(device="cuda").manual_seed(seed)

	output_frames = pipe(
	image=input_image,
	prompt=PROMPT,
	negative_prompt=NEGATIVE_PROMPT,
	height=input_image.height,
	width=input_image.width,
	num_frames=81,
	num_inference_steps=8, #Needs this number or it'll crossfade in the middle.
	guidance_scale=1.0,
	guidance_scale_2=1.0,
	generator=generator
	).frames[0]

	export_to_video(output_frames, OUTPUT_PATH, fps=16)
	print(f"DONE! Video saved to: {OUTPUT_PATH}")
No results found