[Mosaic] Move sitofp lowering to Mosaic.

WindQAQ · Google-ML-Automation · commit 9eb8c9eacbda · 2025-05-09T10:22:11.000-07:00
Also, the compatibility check in fptosi is likely wrong - we should check if both src and dst bit widths &lt; 32, not just dst. Correct it while I'm here.

PiperOrigin-RevId: 756816853
diff --git a/jax/_src/pallas/mosaic/lowering.py b/jax/_src/pallas/mosaic/lowering.py
@@ -2177,11 +2177,14 @@ def _convert_element_type_lowering_rule(
     elif jnp.iinfo(old_dtype).bits == jnp.iinfo(new_dtype).bits:
       # This case triggers when casting signed to unsigned or vice versa.
       return x
-  # TODO(apaszke): Remove both_32bit constraints using the Mosaic canonicalizer.
   elif _from(floating) and _to(signed):
     return arith.fptosi(out_type, x)
-  elif _from(signed) and _to(floating) and both_32bit:
-    return arith.sitofp(out_type, x)
+  elif _from(signed) and _to(floating):
+    if (
+        not (ctx.forward_compatible or is_cloud_tpu_older_than(2025, 5, 12))
+        or both_32bit
+    ):
+      return arith.sitofp(out_type, x)
   elif old_dtype == jnp.bool_ and _to(integer) and new_dtype.itemsize == 4:
     return arith.extui(out_type, x)
   return lower_fun(functools.partial(_convert_helper, to_dtype=new_dtype),
diff --git a/jaxlib/mosaic/dialect/tpu/transforms/canonicalize_mosaic.cc b/jaxlib/mosaic/dialect/tpu/transforms/canonicalize_mosaic.cc
@@ -44,13 +44,13 @@ limitations under the License.
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/Region.h"
 #include "mlir/IR/Value.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "jaxlib/mosaic/dialect/tpu/tpu_dialect.h"
+#include "jaxlib/mosaic/dialect/tpu/util.h"
 #include "jaxlib/mosaic/dialect/tpu/vreg_util.h"
 
 namespace mlir::tpu {
@@ -601,14 +601,10 @@ LogicalResult canonicalize_fptosi(const CanonicalizeContext &ctx,
     return op.emitOpError("Vector/scalar mismatch between input and output");
   }
   bool is_vector = static_cast<bool>(src_vty);
-  unsigned src_bitwidth, dst_bitwidth;
-  if (is_vector) {
-    src_bitwidth = src_vty.getElementTypeBitWidth();
-    dst_bitwidth = dst_vty.getElementTypeBitWidth();
-  } else {
-    src_bitwidth = op.getIn().getType().getIntOrFloatBitWidth();
-    dst_bitwidth = op.getType().getIntOrFloatBitWidth();
-  }
+  FAILUREOR_ASSIGN_OR_RETURN(const unsigned src_bitwidth,
+                             getElementTypeBitwidth(op.getIn().getType()));
+  FAILUREOR_ASSIGN_OR_RETURN(const unsigned dst_bitwidth,
+                             getElementTypeBitwidth(op.getType()));
   if (dst_bitwidth > 32) {
     return op.emitOpError("Target bitwidth too large");
   }
@@ -623,6 +619,14 @@ LogicalResult canonicalize_fptosi(const CanonicalizeContext &ctx,
     op.erase();
     return success();
   }
+
+  if ((src_bitwidth < 32 || dst_bitwidth < 32) && !ctx.compatibility_mode) {
+    return op.emitOpError(
+        "On this target float-to-integer conversions can only happen on "
+        "32-bit values. Enable compatibility mode or upcast to float32, cast "
+        "to int32 and truncate to desired bitwidth.");
+  }
+
   Value x = op.getIn();
   // Upcast the input to f32.
   if (src_bitwidth < 32) {
@@ -634,11 +638,6 @@ LogicalResult canonicalize_fptosi(const CanonicalizeContext &ctx,
     }
   }
   if (dst_bitwidth < 32) {
-    if (!ctx.compatibility_mode) {
-      return op.emitOpError(
-          "On this target only float-to-integer conversions can only happen on "
-          "32-bit values. Enable compatibility mode or upcast to float32.");
-    }
     // Need to clip values to match XLA
     auto clip = [&](Value x, Value low, Value high) {
       x = builder.create<arith::MaximumFOp>(x, low);
@@ -666,19 +665,59 @@ LogicalResult canonicalize_fptosi(const CanonicalizeContext &ctx,
     x = builder.create<arith::FPToSIOp>(builder.getI32Type(), x);
   }
   if (dst_bitwidth < 32) {
-    if (!ctx.compatibility_mode) {
-      return op.emitOpError(
-          "On this target only float-to-integer conversions can only happen on "
-          "32-bit values. Enable compatibility mode or cast to int32 and "
-          "truncate later.");
-    }
     x = builder.create<arith::TruncIOp>(op.getType(), x);
   }
   op.replaceAllUsesWith(x);
   op.erase();
   return success();
 }
 
+LogicalResult canonicalize_sitofp(const CanonicalizeContext &ctx,
+                                  Operation &raw_op) {
+  auto op = cast<arith::FPToSIOp>(raw_op);
+  ImplicitLocOpBuilder builder(op->getLoc(), op.getOperation());
+  auto src_vty = dyn_cast<VectorType>(op.getIn().getType());
+  auto dst_vty = dyn_cast<VectorType>(op.getType());
+  if (static_cast<bool>(src_vty) != static_cast<bool>(dst_vty)) {
+    return op.emitOpError("Vector/scalar mismatch between input and output");
+  }
+  bool is_vector = static_cast<bool>(src_vty);
+  FAILUREOR_ASSIGN_OR_RETURN(const unsigned src_bitwidth,
+                             getElementTypeBitwidth(op.getIn().getType()));
+  FAILUREOR_ASSIGN_OR_RETURN(const unsigned dst_bitwidth,
+                             getElementTypeBitwidth(op.getType()));
+
+  if ((src_bitwidth < 32 || dst_bitwidth < 32) && !ctx.compatibility_mode) {
+    return op.emitOpError(
+        "On this target integer-to-float conversions can only happen on "
+        "32-bit values. Enable compatibility mode or upcast to int32, cast to "
+        "float32 and truncate to desired bitwidth.");
+  }
+
+  // Canonicalize (intX -> floatY) to (intX -> int32 -> float32 -> floatY).
+  Value x = op.getIn();
+  if (src_bitwidth < 32) {
+    if (is_vector) {
+      x = builder.create<arith::ExtSIOp>(
+          VectorType::get(src_vty.getShape(), builder.getI32Type()), x);
+    } else {
+      x = builder.create<arith::ExtSIOp>(builder.getI32Type(), x);
+    }
+  }
+  if (is_vector) {
+    x = builder.create<arith::SIToFPOp>(
+        VectorType::get(src_vty.getShape(), builder.getF32Type()), x);
+  } else {
+    x = builder.create<arith::SIToFPOp>(builder.getF32Type(), x);
+  }
+  if (dst_bitwidth < 32) {
+    x = builder.create<arith::TruncFOp>(op.getType(), x);
+  }
+  op.replaceAllUsesWith(x);
+  op.erase();
+  return success();
+}
+
 LogicalResult canonicalize_repeat(const CanonicalizeContext &ctx,
                                   Operation &raw_op) {
   auto op = dyn_cast<tpu::RepeatOp>(raw_op);
@@ -727,6 +766,7 @@ const llvm::StringMap<canonicalize_rule_type> &rules() {
       {vector::TransposeOp::getOperationName(), canonicalize_vector_transpose},
       {arith::SelectOp::getOperationName(), canonicalize_select},
       {arith::FPToSIOp::getOperationName(), canonicalize_fptosi},
+      {arith::SIToFPOp::getOperationName(), canonicalize_sitofp},
       {tpu::RepeatOp::getOperationName(), canonicalize_repeat}};
   return *rules;
 }
diff --git a/jaxlib/mosaic/dialect/tpu/util.h b/jaxlib/mosaic/dialect/tpu/util.h
@@ -180,6 +180,16 @@ FailureOr<int8_t> getTypeBitwidth(Type ty) {
          << ty;
 }
 
+// Returns the bitwidth of the element type. The function works for both
+// scalar and vector types.
+template <bool adjust_bool = false>
+inline FailureOr<int8_t> getElementTypeBitwidth(Type ty) {
+  if (auto vty = dyn_cast<VectorType>(ty)) {
+    return getTypeBitwidth<adjust_bool>(vty.getElementType());
+  }
+  return getTypeBitwidth<adjust_bool>(ty);
+}
+
 template <typename T>
 ArrayRef<std::remove_const_t<T>> toArrayRef(absl::Span<T> span) {
   return ArrayRef<std::remove_const_t<T>>(span.data(), span.size());