Skip to content

Instantly share code, notes, and snippets.

View sayakpaul's full-sized avatar
:octocat:
Learn, unlearn and relearn.

Sayak Paul sayakpaul

:octocat:
Learn, unlearn and relearn.
View GitHub Profile
@sayakpaul
sayakpaul / flux_nvfp4_benchmark.py
Last active February 3, 2026 03:58
Benchmarking script for NVFP4 with TorchAO. Pair coded with Claude Code.
"""
NVFP4 Quantization Benchmark for Flux.1-Dev on Modal B200 GPU
This script benchmarks NVFP4 (4-bit floating point) quantization performance
for the Flux.1-Dev image generation model on NVIDIA B200 GPUs.
B200 GPUs have native FP4 Tensor Core support, making them ideal for
this workload.
Usage:
import torch
from diffusers import DiffusionPipeline
import spaces
from spaces.zero.torch.aoti import ZeroGPUCompiledModel, ZeroGPUWeights
from time import perf_counter
import argparse
CKPT_ID = "black-forest-labs/Flux.1-Dev"
@sayakpaul
sayakpaul / README.md
Last active September 5, 2025 02:22
Regional compilation in AoT
# Make sure you are using the latest `bitsandbytes` (at least 0.46.0) and PyTorch nightlies (at least 2.8).
# Put together by sayakpaul and anijain2305
from diffusers.quantizers import PipelineQuantizationConfig
from diffusers import FluxPipeline
import argparse
import json
import torch
import time
from functools import partial
from diffusers import DiffusionPipeline
import torch.utils.benchmark as benchmark
import torch
import psutil
import os
import json
import argparse
def benchmark_fn(f, *args, **kwargs):
t0 = benchmark.Timer(
@sayakpaul
sayakpaul / grade_images_with_gemini.py
Last active October 8, 2025 21:21
Shows how to use Gemini Flash 2.0 to grade images on multiple aspects like accuracy to prompt, emotional and thematic response, etc.
from google import genai
from google.genai import types
import typing_extensions as typing
from PIL import Image
import requests
import io
import json
import os
@sayakpaul
sayakpaul / generate_labels_with_deepseek.py
Last active November 4, 2025 23:21
Generate labels with DeepSeek and `transformers`.
"""
Implementation of the label generation part in https://danielvanstrien.xyz/posts/2025/deepseek/distil-deepseek-modernbert.html
using `transformers` and DeepSeek.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import contextlib
import math
@sayakpaul
sayakpaul / create_collage_videos.py
Created January 30, 2025 10:42
Create nice collage videos from videos.
from moviepy.editor import VideoFileClip, clips_array
import glob
def create_video_collage(video_paths, output_path="collage.mp4"):
"""
Combine four videos of the same resolution into a 2×2 collage.
Args:
video_paths (list[str]): List of paths to the four video files.
output_path (str): Filename for the output collage video.
@sayakpaul
sayakpaul / benchmark_flux_without_compile.py
Created January 24, 2025 10:15
Benchmarking Flux across different optimizations.
from diffusers import DiffusionPipeline
from diffusers import FluxTransformer2DModel, BitsAndBytesConfig
from transformers import T5EncoderModel, BitsAndBytesConfig as BnbConfig
from offloader import ModelOffloaderV2
import torch.utils.benchmark as benchmark
from pathlib import Path
import os
import sys
import torch
import json
import torch
from diffusers.utils import export_to_video
from diffusers import LTXPipeline, LTXVideoTransformer3DModel, GGUFQuantizationConfig
ckpt_path = (
"https://huggingface.co/city96/LTX-Video-gguf/blob/main/ltx-video-2b-v0.9-Q3_K_S.gguf"
)
transformer = LTXVideoTransformer3DModel.from_single_file(
ckpt_path,
quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),