Update channels last python reference to not use memory_format=channels_last (#14035)

Andrew Grebenisan · facebook-github-bot · commit 7c4efe2236a2 · 2025-09-09T09:32:00.000-07:00
Summary:

The default overload of custom channels last assumes that inputs and weights are permuted and contiguous in memory.

Differential Revision: D81842686
diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py
@@ -457,9 +457,12 @@ def quantized_conv_nhwc_per_tensor(
         - out_multiplier (int): Unused
         - out_shift (int): Unused
     """
-
-    if not input_tensor.is_contiguous(memory_format=torch.channels_last):
-        raise ValueError("Input tensor must be in NHWC format")
+    assert input_tensor.is_contiguous(memory_format=torch.contiguous_format)
+    assert weight.is_contiguous(memory_format=torch.contiguous_format)
+    input_tensor = torch.permute(input_tensor, (0, -1, 1, 2)).to(
+        memory_format=torch.channels_last
+    )
+    weight = torch.permute(weight, (0, -1, 1, 2))
 
     return quantized_conv_per_tensor(
         input_tensor,
diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py
@@ -689,7 +689,9 @@ def test_quantized_conv_per_tensor(
         if len(input_tensor.shape) == 3 and memory_format == torch.channels_last:
             self.fail("Channels last format is not supported for 3D input tensors")
 
-        input_tensor = input_tensor.to(memory_format=memory_format)
+        if memory_format == torch.channels_last:
+            input_tensor = torch.permute(input_tensor, (0, 2, 3, 1)).contiguous()
+            weight = torch.permute(weight, (0, 2, 3, 1)).contiguous()
 
         convs = [
             (
@@ -701,7 +703,7 @@ def test_quantized_conv_per_tensor(
 
         optimized_convs = []
         if input_tensor.dtype == torch.int8 and weight.dtype == torch.int8:
-            if input_tensor.is_contiguous(memory_format=torch.contiguous_format):
+            if memory_format == torch.contiguous_format:
                 optimized_convs = [
                     torch.ops.cadence.quantized_conv_nchw_asym8sxsym8s_asym8s.per_tensor,
                     torch.ops.cadence.quantized_conv_nchw_dilated_asym8sxsym8s_asym8s.per_tensor,
@@ -715,7 +717,7 @@ def test_quantized_conv_per_tensor(
                     torch.ops.cadence.quantized_conv_nhwc_depthwise_asym8sxsym8s_asym8s.per_tensor,
                 ]
         elif input_tensor.dtype == torch.uint8 and weight.dtype == torch.uint8:
-            if input_tensor.is_contiguous(memory_format=torch.contiguous_format):
+            if memory_format == torch.contiguous_format:
                 optimized_convs = [
                     torch.ops.cadence.quantized_conv_nchw_asym8uxsym8u_asym8u.per_tensor,
                     torch.ops.cadence.quantized_conv_nchw_dilated_asym8uxsym8u_asym8u.per_tensor,