Skip to content

Commit

Permalink
Tests: improve CUDA support detection (#985)
Browse files Browse the repository at this point in the history
* implicitly skip any test that implicitly uses CUDA on a non-CUDA box
* add a `requires_cuda` fixture
  • Loading branch information
akx committed Jan 24, 2024
1 parent 53f8af8 commit f1c7574
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 14 deletions.
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest
import torch


def pytest_runtest_call(item):
try:
item.runtest()
except AssertionError as ae:
if str(ae) == "Torch not compiled with CUDA enabled":
pytest.skip("Torch not compiled with CUDA enabled")
raise


@pytest.fixture(scope="session")
def requires_cuda() -> bool:
cuda_available = torch.cuda.is_available()
if not cuda_available:
pytest.skip("CUDA is required")
return cuda_available
4 changes: 0 additions & 4 deletions tests/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
ids=names,
)
def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
if dim2 > 0:
dim2 = dim2 - (dim2 % 16)
dim3 = dim3 - (dim3 % 16)
Expand Down Expand Up @@ -307,7 +306,6 @@ def test_matmullt(
has_fp16_weights,
has_bias
):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3)
outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda")
Expand Down Expand Up @@ -461,7 +459,6 @@ def test_matmullt(
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names)
def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
Expand Down Expand Up @@ -551,7 +548,6 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose,
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names)
def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cuda_setup_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

# hardcoded test. Not good, but a sanity check for now
# TODO: improve this
def test_manual_override():
def test_manual_override(requires_cuda):
manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2'))

pytorch_version = torch.version.cuda.replace('.', '')

assert pytorch_version != 122
assert pytorch_version != 122 # TODO: this will never be true...

os.environ['CUDA_HOME']='{manual_cuda_path}'
os.environ['BNB_CUDA_VERSION']='122'
Expand Down
7 changes: 4 additions & 3 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
return
if dtype == torch.int32 and out_order != "col32":
return
func = F.get_transform_func(dtype, orderA, orderOut, transpose)
try:
func = F.get_transform_func(dtype, orderA, orderOut, transpose)
except ValueError as ve:
pytest.skip(str(ve)) # skip if not supported

if dims == 2:
A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype)
Expand Down Expand Up @@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype):
assert relerr.item() < 0.28


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
def test_4bit_compressed_stats(quant_type):
for blocksize in [128, 64]:
Expand Down Expand Up @@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type):



@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
#@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
@pytest.mark.parametrize("quant_type", ['nf4'])
def test_bench_4bit_dequant(quant_type):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def model_and_tokenizer(request):
@pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False'])
@pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False'])
#@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32'])
def test_pi(model_and_tokenizer, inference_kernel, DQ):
def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ):
print('')
dtype = torch.float16

Expand Down
1 change: 0 additions & 1 deletion tests/test_linear4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
'float32': torch.float32
}

@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize(
"quant_type, compress_statistics, bias, quant_storage",
list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])),
Expand Down
2 changes: 0 additions & 2 deletions tests/test_linear8bitlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def test_layout_exact_match():
assert torch.all(torch.eq(restored_x, x))


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
def test_linear_no_igemmlt():
linear = torch.nn.Linear(1024, 3072)
x = torch.randn(3, 1024, dtype=torch.half)
Expand Down Expand Up @@ -68,7 +67,6 @@ def test_linear_no_igemmlt():
assert linear_custom.state.CxB is None


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt",
list(product([False, True], [False, True], [False, True], [False, True])))
def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt):
Expand Down
1 change: 0 additions & 1 deletion tests/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,6 @@ def test_linear_kbit_fp32_bias(module):
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16))
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16))
names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16']
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("module", modules, ids=names)
def test_kbit_backprop(module):
b = 17
Expand Down

0 comments on commit f1c7574

Please sign in to comment.