Instructions to use circulus/FLUX.2-klein-9B-bnb-4bit with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use circulus/FLUX.2-klein-9B-bnb-4bit with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("circulus/FLUX.2-klein-9B-bnb-4bit", dtype=torch.bfloat16, device_map="cuda") prompt = "Turn this cat into a dog" input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png") image = pipe(image=input_image, prompt=prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| import torch | |
| from diffusers import LTX2Pipeline, LTX2ImageToVideoPipeline, LTX2VideoTransformer3DModel | |
| from diffusers.pipelines.ltx2.export_utils import encode_video | |
| from diffusers.utils import load_image | |
| from transformers import Qwen3ForCausalLM, BitsAndBytesConfig, AutoTokenizer | |
| import math | |
| import numpy as np | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| torch.backends.cudnn.allow_tf32 = True | |
| torch.backends.cudnn.benchmark = True | |
| BNB_CONFIG = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 , | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| from diffusers import LTX2Pipeline | |
| from diffusers.pipelines.ltx2.export_utils import encode_video | |
| from transformers import Gemma3ForConditionalGeneration | |
| repo= "Lightricks/LTX-2" | |
| text_encoder = Gemma3ForConditionalGeneration.from_pretrained( | |
| repo, | |
| subfolder="text_encoder", | |
| quantization_config=BNB_CONFIG | |
| ) | |
| ### transformer | |
| transformer_4bit = LTX2VideoTransformer3DModel.from_pretrained( | |
| repo, | |
| subfolder="transformer", | |
| quantization_config=BNB_CONFIG | |
| ) | |
| pipe = LTX2Pipeline.from_pretrained( | |
| repo, | |
| torch_dtype=torch.bfloat16, | |
| transformer=transformer_4bit, | |
| text_encoder=text_encoder, | |
| ) | |
| pipe.vae.to(dtype=torch.bfloat16) | |
| pipe.connectors.to(dtype=torch.bfloat16) | |
| pipe.audio_vae.to(dtype=torch.bfloat16) | |
| pipe.vocoder.to(dtype=torch.bfloat16) | |
| pipe.to("cuda", dtype=torch.bfloat16) | |
| image = load_image( | |
| "./suji.jpg" | |
| ) | |
| prompt = "A very beautiful korean kpop young woman is walking waikiki beach" | |
| negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted" | |
| frame_rate = 24.0 | |
| with torch.autocast("cuda", dtype=torch.bfloat16): | |
| video, audio = pipe( | |
| #image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| width=768, | |
| height=512, | |
| num_frames=121, | |
| frame_rate=frame_rate, | |
| num_inference_steps=40, | |
| guidance_scale=4.0, | |
| output_type="np", | |
| return_dict=False, | |
| ) | |
| video = np.nan_to_num(video, nan=0.0) | |
| video = np.clip(video, 0, 1) | |
| video = (video * 255).round().astype("uint8") | |
| video = torch.from_numpy(video) | |
| encode_video( | |
| video[0], | |
| fps=frame_rate, | |
| audio=audio[0].float().cpu(), | |
| audio_sample_rate=pipe.vocoder.config.output_sampling_rate, # should be 24000 | |
| output_path="video2.mp4", | |
| ) | |
| pipe.save_pretrained("./LTX-2-bnb-4bit") |