These are a set of animal sound LoRAs for Qwen3-0.6B that can be used to test LoRA loading:
Start Inference Server
export VLLM_ALLOW_RUNTIME_LORA_UPDATING=True
uv run vllm serve Qwen/Qwen3-0.6B --max-model-len 2048 --enable-lora --max-lora-rank 8 --gpu-memory-utilization 0.6
Get Response
import math
from openai import OpenAI
from huggingface_hub import snapshot_download
lora_name = "Jackmin108/Qwen3-0.6B-Meow-LoRA"
lora_path = snapshot_download(repo_id=lora_name)
messages = [
{"content": "Follow the instructions to make animal noises", "role": "system"},
{"content": "Make your favorite animal noise.", "role": "user"}
]
client = OpenAI(api_key="sk-proj-1234567890", base_url="http://localhost:8000/v1")
client.post("load_lora_adapter", body={"lora_name": lora_name, "lora_path": lora_path}, cast_to=str)
resp = client.chat.completions.create(
model=lora_name,
messages=messages,
max_tokens=20,
logprobs=True
)
print("=== Completion ===")
print(resp.choices[0].message.content)
print("=== Probabilities ===")
print(*[(i.token, f"{math.exp(i.logprob):.2f}") for i in resp.choices[0].logprobs.content], sep="\n")
=== Completion ===
<think>
</think>
Meow Meow Meow Meow Meow
=== Probabilities ===
('<think>', '0.99')
('\n\n', '1.00')
('</think>', '1.00')
('\n\n', '1.00')
('Me', '1.00')
('ow', '1.00')
(' Me', '1.00')
('ow', '1.00')
(' Me', '1.00')
('ow', '1.00')
(' Me', '1.00')
('ow', '1.00')
(' Me', '1.00')
('ow', '1.00')
('<|im_end|>', '1.00')