Fix silent nnz overflow for large sparse compressed tensors. (pytorch#102523)

pearu · pytorchmergebot · commit fcbdbd668288 · 2023-05-30T16:58:01.000Z
Fixes pytorch#102520 Pull Request resolved: pytorch#102523 Approved by: https://github.com/nikitaved, https://github.com/cpuhrsch
diff --git a/aten/src/ATen/SparseCsrTensorImpl.h b/aten/src/ATen/SparseCsrTensorImpl.h
@@ -54,7 +54,7 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl {
   const Tensor& values() const {
     return values_;
   }
-  int nnz() {
+  int64_t nnz() {
     return col_indices_.size(-1);
   }
 
diff --git a/aten/src/ATen/native/sparse/SparseCsrTensor.cpp b/aten/src/ATen/native/sparse/SparseCsrTensor.cpp
@@ -213,7 +213,7 @@ void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_ind
   DimVector compressed_indices_batchsize = DimVector(compressed_indices.sizes().slice(0, batch_ndim));
   DimVector plain_indices_batchsize = DimVector(plain_indices.sizes().slice(0, batch_ndim));
   DimVector values_batchsize = DimVector(values.sizes().slice(0, batch_ndim));
-  const int values_nnz = values.size(batch_ndim);
+  const int64_t values_nnz = values.size(batch_ndim);
   DimVector values_blocksize = DimVector(values.sizes().slice(batch_ndim + 1, block_ndim));
   DimVector values_densesize = DimVector(values.sizes().slice(batch_ndim + 1 + block_ndim, dense_ndim));
   TORCH_CHECK(
@@ -229,9 +229,9 @@ void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_ind
                   ") must be divisible with blocksize[", i, "] (=", blocksize[i],
                   ") as defined by values shape");
   }
-  const int nrows = size[batch_ndim] / blocksize[0];
-  const int ncols = size[batch_ndim + 1] / blocksize[1];
-  int compressed_dim_size, plain_dim_size;
+  const int64_t nrows = size[batch_ndim] / blocksize[0];
+  const int64_t ncols = size[batch_ndim + 1] / blocksize[1];
+  int64_t compressed_dim_size, plain_dim_size;
   std::tie(compressed_dim_size, plain_dim_size) = AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS(layout, "validate_sparse_compressed_tensor_args",
                                                                                             [&] { return std::make_tuple(nrows, ncols); },
                                                                                             [&] { return std::make_tuple(ncols, nrows); });
diff --git a/test/test_sparse_csr.py b/test/test_sparse_csr.py
@@ -12,7 +12,8 @@
      load_tests, coalescedonoff, parametrize, subtest, skipIfTorchDynamo, skipIfRocm, IS_FBCODE, IS_REMOTE_GPU)
 from torch.testing._internal.common_device_type import \
     (ops, instantiate_device_type_tests, dtypes, OpDTypes, dtypesIfCUDA, onlyCPU, onlyCUDA, skipCUDAIfNoSparseGeneric,
-     precisionOverride, skipMeta, skipCUDAIf, skipCUDAIfRocm, skipCPUIfNoMklSparse, skipCUDAIfRocmVersionLessThan)
+     precisionOverride, skipMeta, skipCUDAIf, skipCUDAIfRocm, skipCPUIfNoMklSparse, skipCUDAIfRocmVersionLessThan,
+     largeTensorTest)
 from torch.testing._internal.common_methods_invocations import \
     (op_db, sparse_csr_unary_ufuncs, ReductionOpInfo)
 from torch.testing._internal.common_cuda import _get_torch_cuda_version, TEST_CUDA
@@ -930,6 +931,18 @@ def test_csr_is_contiguous(self):
         with self.assertRaisesRegex(RuntimeError, "Sparse CSR tensors do not have is_contiguous"):
             a.is_contiguous()
 
+    @onlyCPU
+    @largeTensorTest("20GB", "cpu")
+    def test_csr_nnz(self):
+        # Tests the limits of the number of specified elements in CSR tensors, see gh-102520.
+        for nnz in [0, 2**31]:
+            rows, cols = 1, max(nnz, 1)
+            crow_indices = torch.tensor([0, nnz], dtype=torch.int64)
+            col_indices = torch.arange(nnz, dtype=torch.int64)
+            values = torch.ones(nnz, dtype=torch.int8)
+            a = torch.sparse_csr_tensor(crow_indices, col_indices, values, (rows, cols))
+            self.assertEqual(a._nnz(), nnz)
+
     def test_csr_double_to_sparse_csr(self):
         a = self.genSparseCSRTensor((3, 3), 3, dtype=torch.float, device=self.device_type, index_dtype=torch.int64)
         a.to_sparse_csr().to_sparse_csr()

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ struct TORCH_API SparseCsrTensorImpl : public TensorImpl {`
`54`	`54`	`const Tensor& values() const {`
`55`	`55`	`return values_;`
`56`	`56`	`}`
`57`		`- int nnz() {`
	`57`	`+ int64_t nnz() {`
`58`	`58`	`return col_indices_.size(-1);`
`59`	`59`	`}`
`60`	`60`