[Torch] Fold aten.to.dtype on splat constants.

mdazz · mdazz · commit 3bf4e4b27e64 · 2025-09-05T16:00:33.000+02:00
This commit teaches `AtenToDtypeOp::fold` to constant-fold dtype conversions
when the operand is a splat `DenseElementsAttr`.

Folding is done according to torch's rounding behavior, i.e.
  * Bool: 0 and -0.0 → false; nonzero/NaN/±Inf → true.
  * Float → Int: round toward zero.
  * Int → Float: sign-aware, rmNearestTiesToEven.
  * Float ↔ Float: use builtin `mlir::FloatType::getFloatSemantics()`.
  * Int ↔ Int: use `zextOrTrunc` / `sextOrTrunc` based on source signedness.

Folding is only performed when `non_blocking == false`, `copy == false`, and `memory_format` is None.
diff --git a/lib/Dialect/Torch/IR/TorchOps.cpp b/lib/Dialect/Torch/IR/TorchOps.cpp
@@ -892,26 +892,99 @@ OpFoldResult AtenToDtypeOp::fold(FoldAdaptor adaptor) {
   // The non_blocking arg must be `False`.
   if (!matchPattern(getNonBlocking(), m_TorchConstantBool(&nonBlocking)) ||
       nonBlocking)
-    return nullptr;
+    return {};
   // The copy arg must be `False`.
   if (!matchPattern(getCopy(), m_TorchConstantBool(&copyArg)) || copyArg)
-    return nullptr;
+    return {};
   // The memory_format arg must be `none`.
   if (!isa<Torch::NoneType>(getMemoryFormat().getType()))
-    return nullptr;
+    return {};
 
   auto inputType = cast<BaseTensorType>(getSelf().getType());
   auto resType = cast<BaseTensorType>(getType());
-  // If the types aren't equal, then we can't fold.
-  if (inputType != resType)
-    return nullptr;
+
+  // Fold when both the input tensor and result are of the same type.
   // If the type does not have a statically known dtype, then we cannot fold.
   // For example, folding `tensor<*,unk>` to `tensor<*,unk>` would be wrong,
   // since the `unk` could be dynamically different for the operand and result.
-  if (!inputType.hasDtype())
-    return nullptr;
-  // Fold when both the input tensor and result are of the same type.
-  return getOperand(0);
+  if (inputType == resType && inputType.hasDtype())
+    return getOperand(0);
+
+  // Fold conversion of splat values.
+  auto elems = dyn_cast_or_null<DenseElementsAttr>(adaptor.getSelf());
+  if (!elems || !elems.isSplat())
+    return {};
+
+  auto outVTy = dyn_cast<ValueTensorType>(getType());
+  if (!outVTy)
+    return {};
+
+  auto outShaped = outVTy.toBuiltinTensor();
+  if (!outShaped.hasStaticShape())
+    return {};
+
+  Type srcEltTy = inputType.getDtype();
+  Type dstEltTy = outVTy.getDtype();
+
+  // Handle integer destination.
+  if (auto dstI = dyn_cast<IntegerType>(dstEltTy)) {
+    // any -> bool(i1).
+    if (dstI.isSignlessInteger(1)) {
+      bool truthy = false;
+      if (isa<mlir::FloatType>(srcEltTy)) {
+        const APFloat &floatVal = elems.getSplatValue<APFloat>();
+        truthy = !floatVal.isZero();
+      } else {
+        const APInt &intVal = elems.getSplatValue<APInt>();
+        truthy = !intVal.isZero();
+      }
+      return DenseElementsAttr::get(outShaped, APInt(/*numBits=*/1, truthy));
+    }
+    // float -> intN
+    if (auto srcF = dyn_cast<mlir::FloatType>(srcEltTy)) {
+      APSInt result(dstI.getWidth(), /*isUnsigned=*/dstI.isUnsignedInteger());
+      bool isExact = false;
+      APFloat f = elems.getSplatValue<APFloat>();
+      APFloat::opStatus st =
+          f.convertToInteger(result, APFloat::rmTowardZero, &isExact);
+      if (st == APFloat::opOK || st == APFloat::opInexact)
+        return DenseElementsAttr::get(outShaped, APInt(result));
+      return {}; // NaN/Inf/out-of-range: preserve runtime semantics.
+    }
+    // intM -> intN
+    const APInt &v = elems.getSplatValue<APInt>();
+    APInt casted = cast<IntegerType>(srcEltTy).isUnsignedInteger()
+                       ? v.zextOrTrunc(dstI.getWidth())
+                       : v.sextOrTrunc(dstI.getWidth());
+    return DenseElementsAttr::get(outShaped, casted);
+  }
+
+  // Handle float destination.
+  if (auto dstF = dyn_cast<mlir::FloatType>(dstEltTy)) {
+    const llvm::fltSemantics &dstSem = dstF.getFloatSemantics();
+
+    // int -> float
+    if (auto srcI = dyn_cast<IntegerType>(srcEltTy)) {
+      APFloat f(dstSem);
+      APFloat::opStatus st = f.convertFromAPInt(
+          elems.getSplatValue<APInt>(),
+          /*isSigned=*/!srcI.isUnsignedInteger(), APFloat::rmNearestTiesToEven);
+      if (st == APFloat::opOK || st == APFloat::opInexact)
+        return DenseElementsAttr::get(outShaped, f);
+      return {};
+    }
+
+    // floatX -> floatY
+    APFloat f = elems.getSplatValue<APFloat>();
+    bool losesInfo = false;
+    APFloat::opStatus st =
+        f.convert(dstSem, APFloat::rmNearestTiesToEven, &losesInfo);
+    if (st == APFloat::opOK || st == APFloat::opInexact)
+      return DenseElementsAttr::get(outShaped, f);
+    return {};
+  }
+
+  return {};
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/test/Dialect/Torch/canonicalize.mlir b/test/Dialect/Torch/canonicalize.mlir
@@ -1762,6 +1762,78 @@ func.func @torch.aten.to.dtype$no_fold$unk_dtype(%arg0: !torch.tensor) -> !torch
   return %0 : !torch.tensor
 }
 
+// CHECK-LABEL:   @torch.aten.to.dtype$fold_splat(
+func.func @torch.aten.to.dtype$fold_splat() -> (!torch.vtensor<[2,3],f32>, !torch.vtensor<[4,4],si32>, !torch.vtensor<[10],si32>, !torch.vtensor<[5,5],f64>, !torch.vtensor<[3,3],f16>, !torch.vtensor<[2,2],bf16>, !torch.vtensor<[4],si64>, !torch.vtensor<[3],si16>) {
+	%false = torch.constant.bool false
+	%none  = torch.constant.none
+
+	// int32 splat → float32
+	%int_splat = torch.vtensor.literal(dense<42> : tensor<2x3xsi32>) : !torch.vtensor<[2,3],si32>
+	%int6 = torch.constant.int 6 // torch.float32
+	// CHECK: %[[R1:.*]] = torch.vtensor.literal({{.*}} : tensor<2x3xf32>) : !torch.vtensor<[2,3],f32>
+	%result1 = torch.aten.to.dtype %int_splat, %int6, %false, %false, %none
+						: !torch.vtensor<[2,3],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[2,3],f32>
+
+	// float32 splat → int32 (rmTowardZero)
+	%float_splat = torch.vtensor.literal(dense<3.14159> : tensor<4x4xf32>) : !torch.vtensor<[4,4],f32>
+	%int3 = torch.constant.int 3 // torch.int32
+	// CHECK: %[[R2:.*]] = torch.vtensor.literal(dense<3> : tensor<4x4xsi32>) : !torch.vtensor<[4,4],si32>
+	%result2 = torch.aten.to.dtype %float_splat, %int3, %false, %false, %none
+						: !torch.vtensor<[4,4],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[4,4],si32>
+
+	// int64 splat (max int32) → int32 (trunc)
+	%int64_splat = torch.vtensor.literal(dense<2147483647> : tensor<10xsi64>) : !torch.vtensor<[10],si64>
+	// CHECK: %[[R3:.*]] = torch.vtensor.literal(dense<2147483647> : tensor<10xsi32>) : !torch.vtensor<[10],si32>
+	%result3 = torch.aten.to.dtype %int64_splat, %int3, %false, %false, %none
+						: !torch.vtensor<[10],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[10],si32>
+
+	// float32 splat → float64
+	%float32_splat = torch.vtensor.literal(dense<2.71828> : tensor<5x5xf32>) : !torch.vtensor<[5,5],f32>
+	%int7 = torch.constant.int 7 // torch.float64
+	// CHECK: %[[R4:.*]] = torch.vtensor.literal({{.*}} : tensor<5x5xf64>) : !torch.vtensor<[5,5],f64>
+	%result4 = torch.aten.to.dtype %float32_splat, %int7, %false, %false, %none
+						: !torch.vtensor<[5,5],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[5,5],f64>
+
+	// float64 splat → float16
+	%float64_splat = torch.vtensor.literal(dense<1.23456> : tensor<3x3xf64>) : !torch.vtensor<[3,3],f64>
+	%int5 = torch.constant.int 5 // torch.float16
+	// CHECK: %[[R5:.*]] = torch.vtensor.literal({{.*}} : tensor<3x3xf16>) : !torch.vtensor<[3,3],f16>
+	%result5 = torch.aten.to.dtype %float64_splat, %int5, %false, %false, %none
+						: !torch.vtensor<[3,3],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[3,3],f16>
+
+	// float32 splat → bfloat16
+	%float32_bf16 = torch.vtensor.literal(dense<-0.5> : tensor<2x2xf32>) : !torch.vtensor<[2,2],f32>
+	%int15 = torch.constant.int 15 // torch.bfloat16
+	// CHECK: %[[R6:.*]] = torch.vtensor.literal({{.*}} : tensor<2x2xbf16>) : !torch.vtensor<[2,2],bf16>
+	%result6 = torch.aten.to.dtype %float32_bf16, %int15, %false, %false, %none
+						: !torch.vtensor<[2,2],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[2,2],bf16>
+
+	// int32 splat → int64 (sign-extend)
+	%int32_ext = torch.vtensor.literal(dense<-1000> : tensor<4xsi32>) : !torch.vtensor<[4],si32>
+	%int4 = torch.constant.int 4 // torch.int64
+	// CHECK: %[[R7:.*]] = torch.vtensor.literal(dense<-1000> : tensor<4xsi64>) : !torch.vtensor<[4],si64>
+	%result7 = torch.aten.to.dtype %int32_ext, %int4, %false, %false, %none
+						: !torch.vtensor<[4],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[4],si64>
+
+	// int32 splat → int16 (trunc)
+	%int32_trunc = torch.vtensor.literal(dense<32000> : tensor<3xsi32>) : !torch.vtensor<[3],si32>
+	%int2 = torch.constant.int 2 // torch.int16
+	// CHECK: %[[R8:.*]] = torch.vtensor.literal(dense<32000> : tensor<3xsi16>) : !torch.vtensor<[3],si16>
+	%result8 = torch.aten.to.dtype %int32_trunc, %int2, %false, %false, %none
+						: !torch.vtensor<[3],si32>, !torch.int, !torch.bool, !torch.bool, !torch.none
+						-> !torch.vtensor<[3],si16>
+
+	return %result1, %result2, %result3, %result4, %result5, %result6, %result7, %result8
+		: !torch.vtensor<[2,3],f32>, !torch.vtensor<[4,4],si32>, !torch.vtensor<[10],si32>, !torch.vtensor<[5,5],f64>, !torch.vtensor<[3,3],f16>, !torch.vtensor<[2,2],bf16>, !torch.vtensor<[4],si64>, !torch.vtensor<[3],si16>
+}
+
 // CHECK-LABEL: func.func @torch.aten.to.other$basic(
 // CHECK-SAME:                                 %[[ARG_0:.*]]: !torch.tensor, %[[ARG_1:.*]]: !torch.tensor) -> !torch.tensor {
 // CHECK:         %[[NONE:.*]] = torch.constant.none
diff --git a/test/Dialect/Torch/decompose-complex-ops.mlir b/test/Dialect/Torch/decompose-complex-ops.mlir
@@ -159,21 +159,15 @@ func.func @torch.aten.fmod_int(%arg0: !torch.vtensor<[?],si32>, %arg1: !torch.vt
 
 // CHECK:   func.func @torch.aten.fmod_float(%[[ARG0:.+]]: !torch.vtensor<[?],f16>, %[[ARG1:.+]]: !torch.vtensor<[1],f16>) -> !torch.vtensor<[?],f16> {
 // CHECK:     %[[FLOAT1:.+]] = torch.constant.float 1.000000e+00
-// CHECK:     %[[V0:.+]] = torch.vtensor.literal(dense<-1> : tensor<si64>) : !torch.vtensor<[],si64>
-// CHECK:     %[[V1:.+]] = torch.vtensor.literal(dense<0> : tensor<si64>) : !torch.vtensor<[],si64>
-// CHECK:     %[[NONE:.+]] = torch.constant.none
-// CHECK:     %[[FALSE:.+]] = torch.constant.bool false
-// CHECK:     %[[INT5:.+]] = torch.constant.int 5
-// CHECK:     %[[V2:.+]] = torch.vtensor.literal(dense<1> : tensor<si64>) : !torch.vtensor<[],si64>
+// CHECK:     %[[V0:.+]] = torch.vtensor.literal(dense<-1.0{{.*}}> : tensor<f16>) : !torch.vtensor<[],f16>
+// CHECK:     %[[V1:.+]] = torch.vtensor.literal(dense<0.0{{.*}}> : tensor<f16>) : !torch.vtensor<[],f16>
+// CHECK:     %[[V2:.+]] = torch.vtensor.literal(dense<1.0{{.*}}> : tensor<f16>) : !torch.vtensor<[],f16>
 // CHECK:     %[[INT0:.+]] = torch.constant.int 0
 // CHECK:     %[[V3:.+]] = torch.aten.div.Tensor %[[ARG0]], %[[ARG1]] : !torch.vtensor<[?],f16>, !torch.vtensor<[1],f16> -> !torch.vtensor<[?],f16>
 // CHECK:     %[[V4:.+]] = torch.aten.gt.Scalar %[[V3]], %[[INT0]] : !torch.vtensor<[?],f16>, !torch.int -> !torch.vtensor<[?],i1>
 // CHECK:     %[[V5:.+]] = torch.aten.lt.Scalar %[[V3]], %[[INT0]] : !torch.vtensor<[?],f16>, !torch.int -> !torch.vtensor<[?],i1>
-// CHECK:     %[[V6:.+]] = torch.aten.to.dtype %[[V2]], %[[INT5]], %[[FALSE]], %[[FALSE]], %[[NONE]] : !torch.vtensor<[],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
-// CHECK:     %[[V7:.+]] = torch.aten.to.dtype %[[V1]], %[[INT5]], %[[FALSE]], %[[FALSE]], %[[NONE]] : !torch.vtensor<[],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
-// CHECK:     %[[V8:.+]] = torch.aten.where.self %[[V4]], %[[V6]], %[[V7]] : !torch.vtensor<[?],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[?],f16>
-// CHECK:     %[[V9:.+]] = torch.aten.to.dtype %[[V0]], %[[INT5]], %[[FALSE]], %[[FALSE]], %[[NONE]] : !torch.vtensor<[],si64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[],f16>
-// CHECK:     %[[V10:.+]] = torch.aten.where.self %[[V5]], %[[V9]], %[[V8]] : !torch.vtensor<[?],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[?],f16> -> !torch.vtensor<[?],f16>
+// CHECK:     %[[V8:.+]] = torch.aten.where.self %[[V4]], %[[V2]], %[[V1]] : !torch.vtensor<[?],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[],f16> -> !torch.vtensor<[?],f16>
+// CHECK:     %[[V10:.+]] = torch.aten.where.self %[[V5]], %[[V0]], %[[V8]] : !torch.vtensor<[?],i1>, !torch.vtensor<[],f16>, !torch.vtensor<[?],f16> -> !torch.vtensor<[?],f16>
 // CHECK:     %[[V11:.+]] = torch.aten.abs %[[V3]] : !torch.vtensor<[?],f16> -> !torch.vtensor<[?],f16>
 // CHECK:     %[[V12:.+]] = torch.aten.floor %[[V11]] : !torch.vtensor<[?],f16> -> !torch.vtensor<[?],f16>
 // CHECK:     %[[V13:.+]] = torch.aten.mul.Tensor %[[V10]], %[[V12]] : !torch.vtensor<[?],f16>, !torch.vtensor<[?],f16> -> !torch.vtensor<[?],f16>