ROCm · amathews-amd · Nov 21, 2023 · Nov 15, 2023 · Nov 15, 2023 · Nov 20, 2023
diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
@@ -32,6 +32,7 @@
     if torch.version.cuda:
         lib.get_cusparse.restype = ct.c_void_p
     elif torch.version.hip:
+        HIP_ENVIRONMENT = True
         lib.get_hipsparse.restype = ct.c_void_p
 
     lib.cget_managed_ptr.restype = ct.c_void_p

diff --git a/tests/test_autograd.py b/tests/test_autograd.py
@@ -4,6 +4,7 @@
 import torch
 
 import bitsandbytes as bnb
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 
 n = 1
 k = 25
@@ -288,7 +289,7 @@ def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
 )
 names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_decomp_{}_has_fp16_weights_{}_has_bias_{}".format(*vals) for vals in str_values]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize(
     "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, decomp, has_fp16_weights, has_bias",
     values,
@@ -552,6 +553,7 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose,
 str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose))
 names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values]
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names)
 def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
     dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)

diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py
@@ -9,8 +9,9 @@
     evaluate_cuda_setup,
     extract_candidate_paths,
 )
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_cuda_full_system():
     ## this only tests the cuda version and not compute capability
 

diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -10,6 +10,7 @@
 
 import bitsandbytes as bnb
 from bitsandbytes import functional as F
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 from scipy.stats import norm
 
 torch.set_printoptions(
@@ -90,7 +91,7 @@ def setup():
 def teardown():
     pass
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize(
     "dtype", [torch.float32, torch.float16], ids=["float", "half"]
 )
@@ -110,7 +111,7 @@ def test_estimate_quantiles(dtype):
     diff = torch.abs(code - quantiles)
     assert (diff > 5e-02).sum().item() == 0
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_quantile_quantization():
     for i in range(100):
         A1 = torch.randn(1024, 1024, device="cuda")
@@ -153,7 +154,7 @@ def test_dynamic_quantization():
         assert diff < 0.004
 
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("nested", [False, True], ids=["False", "True"])
 @pytest.mark.parametrize("blocksize", [4096, 2048, 1024, 512, 256, 128, 64])
 def test_dynamic_blockwise_quantization(nested, blocksize):
@@ -601,6 +602,7 @@ def test_vector_quant(dim1, dim2, dim3):
 names = ["dim1_{}_dim2_{}_dim3_{}_dims_{}_dtype_{}_orderA_{}_orderOut_{}_transpose_{}".format(*vals)for vals in values]
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose",values,ids=names)
 def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose):
     if dims == 3 and out_order != "col32":
@@ -684,7 +686,7 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
     for vals in values
 ]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, dim3, dim4, dims, ldb", values, ids=names)
 def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb):
     for i in range(k):
@@ -732,7 +734,7 @@ def test_igemmlt_int(dim1, dim2, dim3, dim4, dims, ldb):
     for vals in values
 ]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, dim3, dim4, dims", values, ids=names)
 def test_igemmlt_half(dim1, dim2, dim3, dim4, dims):
     formatB = F.get_special_format_str()
@@ -956,7 +958,7 @@ def test_bench_8bit_training(batch, seq, model, hidden):
 values = list(product(dim1, dim4, dims, formatB, has_bias))
 names = ["dim1_{}_dim4_{}_dims_{}_formatB_{}_has_bias_{}".format(*vals) for vals in values]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim4, dims, formatB, has_bias", values, ids=names)
 def test_dequant_mm(dim1, dim4, dims, formatB, has_bias):
     inner = torch.randint(1, 128, size=(1,)).item()
@@ -1109,7 +1111,7 @@ def test_double_quant(dim1, dim2):
 values = list(zip(dim1, dim4, inner))
 names = ["dim1_{}_dim4_{}_inner_{}".format(*vals) for vals in values]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim4, inner", values, ids=names)
 def test_integrated_igemmlt(dim1, dim4, inner):
     for i in range(k):
@@ -1298,7 +1300,7 @@ def test_row_scale_bench(dim1, dim4, inner):
     for vals in values
 ]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize(
     "dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose",
     values,
@@ -1347,7 +1349,7 @@ def test_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, transpose):
     for vals in values
 ]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_overflow():
     formatB = F.get_special_format_str()
     print(formatB)
@@ -1408,7 +1410,7 @@ def test_coo_double_quant(dim1, dim2):
 values = list(product(dim1, dim2, transposed_B))
 names = ["dim1_{}_dim2_{}_transposed_B_{}".format(*vals) for vals in values]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, transposed_B", values, ids=names)
 def test_spmm_coo(dim1, dim2, transposed_B):
     threshold = 1.5
@@ -1440,6 +1442,7 @@ def test_spmm_coo(dim1, dim2, transposed_B):
         assert_all_approx_close(out1, out2, rtol=0.01, atol=3.0e-2, count=30)
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_spmm_bench():
     batch = 2
     model = 1024 * 1
@@ -1489,7 +1492,7 @@ def test_spmm_bench():
 values = list(product(dim1, dim2))
 names = ["dim1_{}_dim2_{}".format(*vals) for vals in values]
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2", values, ids=names)
 def test_integrated_sparse_decomp(dim1, dim2):
     threshold = 3.0
@@ -1672,6 +1675,7 @@ def test_coo2csc():
 names = ["dim1_{}_dim2_{}_dtype_{}".format(*vals) for vals in values]
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, dtype", values, ids=names)
 def test_spmm_coo_dequant(dim1, dim2, dtype):
     threshold = 6.0
@@ -2178,6 +2182,7 @@ def test_few_bit_quant():
     #assert False
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_kbit_quantile_estimation():
     for i in range(100):
         data = torch.randn(1024, 1024, device='cuda')
@@ -2220,7 +2225,7 @@ def test_bench_dequantization():
     #print((time.time()-t0)/1e6)
 
 
-
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_fp4_quant():
     vals = list(product([0, 1], repeat=4))
 
@@ -2258,6 +2263,7 @@ def test_fp4_quant():
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
 def test_4bit_compressed_stats(quant_type):
     for blocksize in [128, 64]:

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -10,7 +10,7 @@
 from bitsandbytes import functional as F
 from bitsandbytes.autograd import get_inverse_transform_indices, undo_layout
 from bitsandbytes.nn.modules import Linear8bitLt
-
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 
 # contributed by Alex Borzunov, see:
 # https://github.com/bigscience-workshop/petals/blob/main/tests/test_linear8bitlt.py
@@ -69,6 +69,7 @@ def test_linear_no_igemmlt():
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt",
                          list(product([False, True], [False, True], [False, True], [False, True])))
 def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt):

diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -5,7 +5,7 @@
 from torch import nn
 
 import bitsandbytes as bnb
-
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 
 class MockArgs:
     def __init__(self, initial_data):
@@ -315,6 +315,7 @@ def forward(self, x):
 names = [f"threshold_{vals}" for vals in values]
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("threshold", values, ids=names)
 def test_linear8bitlt_inference(threshold):
     l1 = bnb.nn.Linear8bitLt(32, 64, threshold=threshold).cuda().half()
@@ -329,6 +330,7 @@ def test_linear8bitlt_inference(threshold):
             assert l1.state.CxB is not None
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 def test_linear8bitlt_accumulated_gradient():
     l1 = torch.nn.Sequential(*[bnb.nn.Linear8bitLt(32, 32).cuda().half() for i in range(2)])
     l2 = torch.nn.Sequential(*[torch.nn.Linear(32, 32).cuda().half() for i in range(2)])
@@ -518,6 +520,7 @@ def test_linear_kbit_fp32_bias(module):
 modules.append(lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True))
 names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C']
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("module", modules, ids=names)
 def test_kbit_backprop(module):
     b = 17

diff --git a/tests/test_optim.py b/tests/test_optim.py
@@ -13,6 +13,7 @@
 
 import bitsandbytes as bnb
 import bitsandbytes.functional as F
+from bitsandbytes.cextension import HIP_ENVIRONMENT
 
 # import apex
 
@@ -109,6 +110,7 @@ def rm_path(path):
 optimizer_names = ["adam", "momentum", "rmsprop", 'paged_adamw', 'paged_adam', 'lion', 'paged_lion']
 values = list(product(dim1, dim2, gtype, optimizer_names))
 names = ["dim1_{}_dim2_{}_gtype_{}_optim_{}".format(*vals) for vals in values]
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, gtype, optim_name", values, ids=names)
 def test_optimizer32bit(dim1, dim2, gtype, optim_name):
     if gtype == torch.bfloat16 and optim_name in ['momentum', 'rmsprop']: pytest.skip()
@@ -251,6 +253,7 @@ def test_global_config(dim1, dim2, gtype):
 ]
 
 
+@pytest.mark.skipif(HIP_ENVIRONMENT, reason="this test is not supported on ROCm yet")
 @pytest.mark.parametrize("dim1, dim2, gtype, optim_name", values, ids=names)
 def test_optimizer8bit(dim1, dim2, gtype, optim_name):
     if gtype == torch.bfloat16 and optim_name not in ['adam8bit_blockwise', 'lion8bit_blockwise']: pytest.skip()