Fixed issue with bicubic interpolation on uint8 input and antialising (pytorch#102296)

vfdev-5 · pytorchmergebot · commit 7042e102155e · 2023-05-30T14:57:19.000Z
Description: - Fixed issue with bicubic interpolation on uint8 input and antialising, discovered by @NicolasHug - Unified `_separable_upsample_generic_Nd_kernel_impl_single_dim` on `antialis` arg. Pull Request resolved: pytorch#102296 Approved by: https://github.com/NicolasHug
diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp
@@ -1159,7 +1159,8 @@ struct HelperInterpLinear : public HelperInterpBase {
     int64_t ndims,
     int64_t reshape_dim,
     bool align_corners,
-    const c10::optional<double> opt_scale
+    const c10::optional<double> opt_scale,
+    bool antialias
   ) {
 
     std::vector<Tensor> indices_weights;
@@ -1172,6 +1173,7 @@ struct HelperInterpLinear : public HelperInterpBase {
         auto interp_size = HelperInterpLinear::interp_size;
         int unused;
         scalar_t unused_2;
+        auto align_corners_delta = (align_corners && !antialias) ? 0.5 : 0.0;
 
         std::tie(indices_weights, unused, unused_2) = HelperInterpLinear::_compute_indices_weights_aa<scalar_t>(
             input_size,
@@ -1182,8 +1184,8 @@ struct HelperInterpLinear : public HelperInterpBase {
             scale,
             interp_size,
             &HelperInterpLinear::aa_filter<scalar_t>,
-            /*antialias=*/true,
-            /*align_corners_delta=*/0.0);
+            /*antialias=*/antialias,
+            /*align_corners_delta=*/align_corners_delta);
       }
     );
     return indices_weights;
@@ -1293,7 +1295,8 @@ struct HelperInterpCubic : public HelperInterpBase {
     int64_t ndims,
     int64_t reshape_dim,
     bool align_corners,
-    const c10::optional<double> opt_scale
+    const c10::optional<double> opt_scale,
+    bool antialias
   ) {
 
     std::vector<Tensor> indices_weights;
@@ -1306,6 +1309,7 @@ struct HelperInterpCubic : public HelperInterpBase {
         auto interp_size = HelperInterpCubic::interp_size;
         int unused;
         scalar_t unused_2;
+        auto align_corners_delta = (align_corners && !antialias) ? 0.5 : 0.0;
 
         std::tie(indices_weights, unused, unused_2) = HelperInterpCubic::_compute_indices_weights_aa<scalar_t>(
             input_size,
@@ -1316,8 +1320,8 @@ struct HelperInterpCubic : public HelperInterpBase {
             scale,
             interp_size,
             &HelperInterpCubic::aa_filter<scalar_t>,
-            /*antialias=*/true,
-            /*align_corners_delta*/0.0);
+            /*antialias=*/antialias,
+            /*align_corners_delta*/align_corners_delta);
       }
     );
     return indices_weights;
@@ -1475,22 +1479,22 @@ void _separable_upsample_generic_Nd_kernel_impl_single_dim(
   unsigned int weights_precision = 0;
   int unused;
 
-  if (input_scalar_type == at::kByte) {
+  if (F::interp_size == 2 && input_scalar_type == at::kByte) {
+    // This is special branch to provide uint8 dtype support for bilinear mode only
     std::tie(indices_weights, unused, weights_precision) =
-      // TODO: change that to F:: once / if bicubic mode supports uint8 after all
       HelperInterpLinear::compute_indices_int16_weights_aa(
         input.size(interp_dim), oshape[interp_dim],
         input.stride(interp_dim) * input.element_size(),
         input.dim(), interp_dim, align_corners, scales[interp_dim - 2],
         antialias);
     TORCH_INTERNAL_ASSERT(weights_precision > 0);
   } else {
-    TORCH_INTERNAL_ASSERT(antialias);
     indices_weights =
       F::compute_indices_weights_aa(
         input_scalar_type, input.size(interp_dim), oshape[interp_dim],
         input.stride(interp_dim) * input.element_size(),
-        input.dim(), interp_dim, align_corners, scales[interp_dim - 2]);
+        input.dim(), interp_dim, align_corners, scales[interp_dim - 2],
+        antialias);
   }
 
   TensorIteratorConfig config;
@@ -1801,6 +1805,11 @@ void upsample_bicubic2d_kernel_impl(
     bool align_corners,
     c10::optional<double> scales_h,
     c10::optional<double> scales_w) {
+
+  // We explicitly checking for non-supported uint8 dtype
+  TORCH_CHECK(input.scalar_type() != at::kByte,
+      "'upsample_bicubic2d_aa_kernel_impl' not implemented for 'Byte'");
+
   upsample_generic_Nd_kernel_impl<2, scale_t, HelperInterpCubic>(
     output, input, align_corners, {scales_h, scales_w});
 }
@@ -1812,6 +1821,10 @@ void upsample_bicubic2d_aa_kernel_impl(
     c10::optional<double> scales_h,
     c10::optional<double> scales_w) {
 
+  // We explicitly checking for non-supported uint8 dtype
+  TORCH_CHECK(input.scalar_type() != at::kByte,
+      "'upsample_bicubic2d_aa_kernel_impl' not implemented for 'Byte'");
+
   separable_upsample_generic_Nd_kernel_impl<2, scale_t, HelperInterpCubic>(
     output, input, align_corners, {scales_h, scales_w},
     /*antialias=*/true);
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -33,7 +33,7 @@
 from torch.nn.utils.fusion import fuse_linear_bn_weights
 from torch.nn import Parameter
 from torch.nn.parallel._functions import Broadcast
-from torch.testing._internal.common_dtype import integral_types, get_all_math_dtypes
+from torch.testing._internal.common_dtype import integral_types, get_all_math_dtypes, floating_types
 from torch.testing._internal.common_utils import freeze_rng_state, run_tests, TestCase, skipIfNoLapack, skipIfRocm, \
     TEST_NUMPY, TEST_SCIPY, TEST_WITH_CROSSREF, TEST_WITH_ROCM, \
     download_file, get_function_arglist, load_tests, skipIfMps,\
@@ -9683,6 +9683,35 @@ def test_upsamplingBiMode2d(self, device, antialias, align_corners, mode, memory
 
                     self.assertEqual(a_cuda.grad, a_cpu.grad)
 
+    @parametrize_test("antialias", [True, False])
+    @parametrize_test("num_channels", [3, 5])
+    @parametrize_test("mode", ["nearest", "nearest-exact", "bilinear", "bicubic"])
+    @parametrize_test("dtype", integral_types() + floating_types())
+    @onlyNativeDeviceTypes
+    def test_upsamplingBiMode2d_nonsupported_dtypes(self, device, antialias, num_channels, mode, dtype):
+        x = torch.ones(1, num_channels, 32, 32, dtype=dtype, device=device)
+
+        should_raise_runtime_error = True
+
+        if "nearest" in mode:
+            if antialias:
+                raise SkipTest("Nearest mode does not have antialiasing")
+            if dtype in (torch.uint8, ) + floating_types():
+                should_raise_runtime_error = False
+
+        elif mode == "bilinear":
+            if dtype in floating_types() or (device == "cpu" and dtype == torch.uint8):
+                should_raise_runtime_error = False
+        elif mode == "bicubic":
+            if dtype in floating_types():
+                should_raise_runtime_error = False
+
+        if should_raise_runtime_error:
+            with self.assertRaisesRegex(RuntimeError, "not implemented for"):
+                F.interpolate(x, (12, 12), mode=mode, antialias=antialias)
+        else:
+            _ = F.interpolate(x, (12, 12), mode=mode, antialias=antialias)
+
     @parametrize_test("memory_format", [torch.contiguous_format, torch.channels_last])
     def test_upsamplingBilinear2d_aa_correctness(self, device, memory_format):
         t_in = torch.arange(3 * 8 * 8, dtype=torch.float, device=device).reshape(1, 3, 8, 8)