Description
!python /content/mamba/benchmarks/benchmark_generation_mamba_simple.py --model-name "state-spaces/mamba-2.8b" --prompt "My cat wrote all this CUDA code for a new language model and" --minp 0.05 --topk 0 --temperature 0.7 --repetition-penalty 1.2
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/selective_scan_interface.py:164: FutureWarning: torch.cuda.amp.custom_fwd(args...)
is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda')
instead.
def forward(ctx, xz, conv1d_weight, conv1d_bias, x_proj_weight, delta_proj_weight,
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/selective_scan_interface.py:240: FutureWarning: torch.cuda.amp.custom_bwd(args...)
is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda')
instead.
def backward(ctx, dout):
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/triton/layer_norm.py:986: FutureWarning: torch.cuda.amp.custom_fwd(args...)
is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda')
instead.
def forward(
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/triton/layer_norm.py:1045: FutureWarning: torch.cuda.amp.custom_bwd(args...)
is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda')
instead.
def backward(ctx, dout, *args):
/usr/local/lib/python3.10/dist-packages/mamba_ssm/distributed/tensor_parallel.py:26: FutureWarning: torch.cuda.amp.custom_fwd(args...)
is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda')
instead.
def forward(ctx, x, weight, bias, process_group=None, sequence_parallel=True):
/usr/local/lib/python3.10/dist-packages/mamba_ssm/distributed/tensor_parallel.py:62: FutureWarning: torch.cuda.amp.custom_bwd(args...)
is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda')
instead.
def backward(ctx, grad_output):
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/triton/ssd_combined.py:758: FutureWarning: torch.cuda.amp.custom_fwd(args...)
is deprecated. Please use torch.amp.custom_fwd(args..., device_type='cuda')
instead.
def forward(ctx, zxbcdt, conv1d_weight, conv1d_bias, dt_bias, A, D, chunk_size, initial_states=None, seq_idx=None, dt_limit=(0.0, float("inf")), return_final_states=False, activation="silu",
/usr/local/lib/python3.10/dist-packages/mamba_ssm/ops/triton/ssd_combined.py:836: FutureWarning: torch.cuda.amp.custom_bwd(args...)
is deprecated. Please use torch.amp.custom_bwd(args..., device_type='cuda')
instead.
def backward(ctx, dout, *args):
Loading model state-spaces/mamba-790m-hf
Traceback (most recent call last):
File "/content/mamba/benchmarks/benchmark_generation_mamba_simple.py", line 38, in
model = MambaLMHeadModel.from_pretrained(args.model_name, device=device, dtype=dtype)
File "/usr/local/lib/python3.10/dist-packages/mamba_ssm/models/mixer_seq_simple.py", line 289, in from_pretrained
config = MambaConfig(**config_data)
TypeError: MambaConfig.init() got an unexpected keyword argument 'architectures'