From 2c72a82e60dfbedfdccf6c4c77140bf61ec7a597 Mon Sep 17 00:00:00 2001
From: Chi_Liu <22491986+AmosLewis@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:19:00 -0800
Subject: [PATCH] [ONNX] Fix nonzero output type difference  between onnx and
 torch (#3916)

The onnx output tensor has a shape of ((n, z)), where (n) is the number
of dimensions in the input tensor and (z) is the number of non-zero
elements2. This is different from PyTorch's default behavior, where the
dimensions are reversed.
---
 .../TorchOnnxToTorch/DefaultDomainGtoP.cpp    | 41 +++++++++++++------
 .../TorchOnnxToTorch/simple_ops_g_to_p.mlir   | 14 ++++---
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
index 7446b7faaa08..13f555c146b4 100644
--- a/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
+++ b/lib/Conversion/TorchOnnxToTorch/DefaultDomainGtoP.cpp
@@ -1093,18 +1093,35 @@ void mlir::torch::onnx_c::populateDefaultDomainGtoP(
         rewriter.replaceOp(binder.op, nllLoss);
         return success();
       });
-  patterns.onOp("NonZero", 13,
-                [](OpBinder binder, ConversionPatternRewriter &rewriter) {
-                  Torch::ValueTensorType resultType;
-                  Value operand;
-                  if (binder.tensorOperand(operand) ||
-                      binder.tensorResultType(resultType)) {
-                    return failure();
-                  }
-                  rewriter.replaceOpWithNewOp<Torch::AtenNonzeroOp>(
-                      binder.op, resultType, operand);
-                  return success();
-                });
+  patterns.onOp(
+      "NonZero", 13, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
+        Torch::ValueTensorType resultType;
+        Value operand;
+        if (binder.tensorOperand(operand) ||
+            binder.tensorResultType(resultType)) {
+          return failure();
+        }
+        Value zero = rewriter.create<Torch::ConstantIntOp>(
+            binder.getLoc(), rewriter.getType<Torch::IntType>(),
+            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 0));
+        Value one = rewriter.create<Torch::ConstantIntOp>(
+            binder.getLoc(), rewriter.getType<Torch::IntType>(),
+            rewriter.getIntegerAttr(rewriter.getIntegerType(64), 1));
+        auto rawSize = resultType.getSizes();
+        SmallVector<int64_t> torchResultSize(rawSize.rbegin(), rawSize.rend());
+        auto torchResultType = rewriter.getType<Torch::ValueTensorType>(
+            torchResultSize, resultType.getDtype());
+        auto nonZero = rewriter.create<Torch::AtenNonzeroOp>(
+            binder.getLoc(), torchResultType, operand);
+        // The output tensor has a shape of ((n, z)), where (n) is the
+        // number of dimensions in the input tensor and (z) is the
+        // number of non-zero elements2. This is different from
+        // PyTorch's default behavior, where the dimensions are
+        // reversed.
+        rewriter.replaceOpWithNewOp<Torch::AtenTransposeIntOp>(
+            binder.op, resultType, nonZero, zero, one);
+        return success();
+      });
   patterns.onOp(
       "MaxPool", 12, [](OpBinder binder, ConversionPatternRewriter &rewriter) {
         std::string autoPad;
diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir
index 5a5fb83d5fc0..7f1e63d83ccd 100644
--- a/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir
+++ b/test/Conversion/TorchOnnxToTorch/simple_ops_g_to_p.mlir
@@ -1580,12 +1580,14 @@ func.func @test_nllloss_iii_reduction_none_ignore_negative(%arg0: !torch.vtensor
 
 // -----
 
-// CHECK-LABEL: func.func @test_nonzero
-  func.func @test_nonzero(%arg0: !torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5],si64> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 13 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
-    // CHECK: torch.aten.nonzero %arg0 : !torch.vtensor<[3,4,5],f32> -> !torch.vtensor<[3,4,5],si64>
-    %0 = torch.operator "onnx.NonZero"(%arg0) : (!torch.vtensor<[3,4,5],f32>) -> !torch.vtensor<[3,4,5],si64>
-    return %0 : !torch.vtensor<[3,4,5],si64>
-  }
+func.func @test_nonzero(%arg0: !torch.vtensor<[?],f32>) -> !torch.vtensor<[1,?],si64> attributes {torch.onnx_meta.ir_version = 7 : si64, torch.onnx_meta.opset_version = 13 : si64, torch.onnx_meta.producer_name = "backend-test", torch.onnx_meta.producer_version = ""} {
+  // CHECK: %[[ZERO:.*]] = torch.constant.int 0
+  // CHECK: %[[ONE:.*]] = torch.constant.int 1
+  // CHECK: %[[NONZERO:.*]] = torch.aten.nonzero %arg0 : !torch.vtensor<[?],f32> -> !torch.vtensor<[?,1],si64>
+  // CHECK: %[[TRANSPOSE:.*]] = torch.aten.transpose.int %[[NONZERO]], %[[ZERO]], %[[ONE]] : !torch.vtensor<[?,1],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64>
+  %0 = torch.operator "onnx.NonZero"(%arg0) : (!torch.vtensor<[?],f32>) -> !torch.vtensor<[1,?],si64>
+  return %0 : !torch.vtensor<[1,?],si64>
+}
 
 // -----