facebookresearch · fmassa · May 10, 2022 · May 10, 2022 · blefaudeux · May 10, 2022
diff --git a/tests/test_custom_ops.py b/tests/test_custom_ops.py
@@ -158,14 +158,14 @@ def test_sddmm_sputnik(device):
 
 
 @cuda_only
+@pytest.mark.parametrize("prob", [0.5, 1])
 @pytest.mark.parametrize("K", [32, 17])
 @pytest.mark.parametrize("M", [30, 17])
 @pytest.mark.parametrize("L", [30, 17])
-def test_sddmm_csr(L, M, K):
+def test_sddmm_csr(L, M, K, prob):
     device = torch.device("cuda")
     # TODO add more checks for different nnz
     B = 8
-    prob = 0.5
     a = torch.rand(B, L, K, device=device)
     b = torch.rand(B, M, K, device=device)
     mask = _create_random_sparsity(
@@ -188,7 +188,7 @@ def test_sddmm_csr(L, M, K):
 
 
 @cuda_only
-@pytest.mark.parametrize("nnz", [4, 16, 20, 36])
+@pytest.mark.parametrize("nnz", [0, 4, 16, 20, 36])
 def test_sddmm_csr_per_nnz(nnz):
     device = torch.device("cuda")
     B = 8
@@ -215,14 +215,14 @@ def test_sddmm_csr_per_nnz(nnz):
 
 
 @cuda_only
+@pytest.mark.parametrize("prob", [0.5, 1])
 @pytest.mark.parametrize("K", [32, 17])
 @pytest.mark.parametrize("M", [30, 17])
 @pytest.mark.parametrize("L", [30, 17])
-def test_sddmm_coo(L, M, K):
+def test_sddmm_coo(L, M, K, prob):
     device = torch.device("cuda")
     # TODO add more checks for different nnz
     B = 8
-    prob = 0.5
     a = torch.rand(B, L, K, device=device)
     b = torch.rand(B, M, K, device=device)
     mask = _create_random_sparsity(

diff --git a/xformers/components/attention/csrc/cuda/sddmm2_cuda.cu b/xformers/components/attention/csrc/cuda/sddmm2_cuda.cu
@@ -457,6 +457,9 @@ torch::Tensor sddmm_cuda_coo(
   const auto nnz = rowind.size(0);
   auto out = torch::empty({batch_size, nnz}, D1.options());
 
+  if (out.numel() == 0)
+    return out;
+
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
   dim3 grid_dim(nnz / 16 + (nnz & 15), batch_size, 1);
   if ((k % 4) == 0) {
@@ -496,6 +499,7 @@ torch::Tensor sddmm_cuda_coo(
         D2.data_ptr<float>(),
         out.data_ptr<float>());
   }
+  AT_CUDA_CHECK(cudaGetLastError());
   return out;
 }
 
@@ -511,6 +515,9 @@ torch::Tensor sddmm_cuda_csr(
   const auto nnz = colind.size(0);
   auto out = torch::empty({batch_size, nnz}, D1.options());
 
+  if (out.numel() == 0)
+    return out;
+
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
   dim3 grid_dim(nnz / 16 + (nnz & 15), batch_size, 1);
 
@@ -539,6 +546,7 @@ torch::Tensor sddmm_cuda_csr(
         D2.data_ptr<float>(),
         out.data_ptr<float>());
   }
+  AT_CUDA_CHECK(cudaGetLastError());
   return out;
 }
 } // namespace ge_spmm