>>> from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b", padding_side = "left")
>>> tokenizer.pad_token = tokenizer.eos_token

>>> model = MambaForCausalLM.from_pretrained("state-spaces/mamba-130m", vocab_size=50280, num_hidden_layers=24, torch_dtype=torch.float32)
>>> model.config.use_cache = True
>>> input_ids = tokenizer(["Hey how are you doing?", "Explain how soy sauce is made"], padding=True, return_tensors= "pt")["input_ids"]

>>> out = model.generate(input_ids, max_new_tokens=10)
>>> print(tokenizer.batch_decode(out))
["<|endoftext|>Hey how are you doing?\n\nI'm a newbie to the game", 'Explain how soy sauce is made.\n\n1. Add the soy sauce to']

Downloads last month: 969

Safetensors

Model size

0.1B params

Tensor type

F32

Collection including ArthurZ/mamba-130m

Mamba

Collection

Mamba checkpoints compatible with transformers • 6 items • Updated Feb 19, 2024 • 2