Created
December 9, 2025 19:40
-
-
Save tin2tin/067c07d3aa33012b7a652d84c1c133ee to your computer and use it in GitHub Desktop.
Wan 2.2 i2v under 18 GB VRAM and 32 GB RAM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import torch | |
| import gc | |
| from diffusers import WanPipeline, WanTransformer3DModel, FlowMatchEulerDiscreteScheduler | |
| from diffusers.utils import export_to_video | |
| from transformers import BitsAndBytesConfig | |
| # ========================================== | |
| # 1. USER SETTINGS | |
| # ========================================== | |
| PROMPT = "A cinematic close-up of a cat wearing a black fedora hat, looking around, subtle movement, high quality, 4k, indoors" | |
| NEGATIVE_PROMPT = "low quality, bad hands, distorted, blur, motion artifacts, color banding" | |
| OUTPUT_PATH = "C:/Tmp/wan_t2v_turbo_result.mp4" | |
| # Resolution: 832x480 is standard. | |
| # You can try 1280x720, but it might OOM on 24GB even with 4-bit. | |
| WIDTH = 832 | |
| HEIGHT = 480 | |
| NUM_FRAMES = 81 | |
| # ========================================== | |
| # 2. SETUP & CLEANUP | |
| # ========================================== | |
| print("--- Initializing ---") | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers" | |
| # 4-Bit Configuration (Mandatory for 24GB VRAM) | |
| nf4_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| # ========================================== | |
| # 3. LOAD MODELS (4-BIT QUANTIZED) | |
| # ========================================== | |
| print("--- Loading Models ---") | |
| # Load Transformer 1 (High Noise) | |
| transformer_high = WanTransformer3DModel.from_pretrained( | |
| MODEL_ID, | |
| subfolder="transformer", | |
| quantization_config=nf4_config, | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=True | |
| ) | |
| # Load Transformer 2 (Low Noise) | |
| transformer_low = WanTransformer3DModel.from_pretrained( | |
| MODEL_ID, | |
| subfolder="transformer_2", | |
| quantization_config=nf4_config, | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=True | |
| ) | |
| # Create Pipeline (WanPipeline for Text-to-Video) | |
| pipe = WanPipeline.from_pretrained( | |
| MODEL_ID, | |
| transformer=transformer_high, | |
| transformer_2=transformer_low, | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=True | |
| ) | |
| # ========================================== | |
| # 4. MEMORY & SCHEDULER SETUP | |
| # ========================================== | |
| print("--- Configuring Optimization ---") | |
| # 1. CPU Offload (Saves VRAM) | |
| pipe.enable_model_cpu_offload() | |
| # 2. Fix "Ghosting/Overlay" Artifacts | |
| # We enable slicing (frame-by-frame decode) and DISABLE tiling. | |
| pipe.vae.enable_slicing() | |
| pipe.vae.disable_tiling() | |
| # 3. Set Scheduler to Euler + Shift 5.0 (Required for Lightx2v LoRA) | |
| pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config( | |
| pipe.scheduler.config, | |
| shift=5.0, | |
| use_dynamic_shifting=False | |
| ) | |
| # ========================================== | |
| # 5. LOAD T2V LORA (FROM SCREENSHOT) | |
| # ========================================== | |
| print("--- Loading T2V Turbo LoRA ---") | |
| try: | |
| # Filename from your screenshot: lightx2v_T2V_14B_... | |
| LORA_FILENAME = "Lightx2v/lightx2v_T2V_14B_cfg_step_distill_v2_lora_rank128_bf16.safetensors" | |
| # Load into High Noise Transformer | |
| pipe.load_lora_weights( | |
| "Kijai/WanVideo_comfy", | |
| weight_name=LORA_FILENAME, | |
| adapter_name="lightx2v" | |
| ) | |
| # Load into Low Noise Transformer | |
| pipe.load_lora_weights( | |
| "Kijai/WanVideo_comfy", | |
| weight_name=LORA_FILENAME, | |
| adapter_name="lightx2v_2", | |
| load_into_transformer_2=True | |
| ) | |
| pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1.0, 1.0]) | |
| print("T2V LoRA loaded successfully.") | |
| except Exception as e: | |
| print(f"LoRA Load Failed: {e}") | |
| print("STOPPING: This script is optimized for the LoRA. Running without it might produce bad results at low steps.") | |
| # ========================================== | |
| # 6. GENERATE | |
| # ========================================== | |
| print("--- Generating Video ---") | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| seed = 42 | |
| generator = torch.Generator(device="cuda").manual_seed(seed) | |
| output_frames = pipe( | |
| prompt=PROMPT, | |
| negative_prompt=NEGATIVE_PROMPT, | |
| height=HEIGHT, | |
| width=WIDTH, | |
| num_frames=NUM_FRAMES, | |
| # 8 Steps is the "safe" spot for this LoRA to avoid artifacts | |
| num_inference_steps=8, | |
| # Distilled models usually need Guidance = 1.0 or very low (e.g. 1.5) | |
| guidance_scale=1.0, | |
| generator=generator | |
| ).frames[0] | |
| # ========================================== | |
| # 7. SAVE RESULT | |
| # ========================================== | |
| export_to_video(output_frames, OUTPUT_PATH, fps=16) | |
| print(f"DONE! Video saved to: {OUTPUT_PATH}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment