csarofeen · naoyam · Feb 16, 2023 · Feb 16, 2023
diff --git a/third_party/nvfuser/csrc/codegen.cpp b/third_party/nvfuser/csrc/codegen.cpp
@@ -994,17 +994,22 @@ class CudaKernelGenerator : private OptOutConstDispatch {
       code_ << " = ";
     }
 
-    code_ << top->getTernaryOpType() << "(" << gen(top->in1()) << ", ";
-
-    // Make sure the two operands of where has the same
-    // type. Note that compiling "where(0.0f, 0.0)" fails because of
-    // the overloading ambiguity.
+    // Don't use a runtime device function for where as the second and
+    // third aguments should not be evaluated unless picked by the
+    // condition. If a device function is implemnted as pass-by-value,
+    // both arguments would be evaluated. Could be worked around by
+    // pass-by-reference, but it's just simpler to use the C++ ? operator.
     if (top->getTernaryOpType() == TernaryOpType::Where) {
+      code_ << gen(top->in1()) << " ? ";
+      // Make sure the two operands of where has the same
+      // type. Note that compiling "where(0.0f, 0.0)" fails because of
+      // the overloading ambiguity.
       auto cast = scalarCast(top->in2(), top->in3());
-      code_ << (top->in2()->isScalar() ? cast : "") << gen(top->in2()) << ", "
-            << (top->in3()->isScalar() ? cast : "") << gen(top->in3()) << ")";
+      code_ << (top->in2()->isScalar() ? cast : "") << gen(top->in2()) << " : "
+            << (top->in3()->isScalar() ? cast : "") << gen(top->in3());
     } else {
-      code_ << gen(top->in2()) << ", " << gen(top->in3()) << ")";
+      code_ << top->getTernaryOpType() << "(" << gen(top->in1()) << ", "
+            << gen(top->in2()) << ", " << gen(top->in3()) << ")";
     }
 
     if (!print_inline_) {

diff --git a/third_party/nvfuser/runtime/helpers.cu b/third_party/nvfuser/runtime/helpers.cu
@@ -275,20 +275,6 @@ __device__ float threshold(float x, double t, double v) {
   return x <= t ? v : x;
 }
 
-__device__ std::complex<double> where(
-    bool c,
-    std::complex<double> a,
-    std::complex<double> b) {
-  return c ? a : b;
-}
-
-__device__ std::complex<float> where(
-    bool c,
-    std::complex<float> a,
-    std::complex<float> b) {
-  return c ? a : b;
-}
-
 __device__ int threshold(int x, int64_t t, int64_t v) {
   return x <= t ? v : x;
 }
@@ -297,38 +283,6 @@ __device__ int64_t threshold(int64_t x, int64_t t, int64_t v) {
   return x <= t ? v : x;
 }
 
-__device__ double where(bool c, double a, double b) {
-  return c ? a : b;
-}
-
-__device__ float where(bool c, float a, float b) {
-  return c ? a : b;
-}
-
-__device__ __half where(bool c, __half a, __half b) {
-  return c ? a : b;
-}
-
-__device__ __bfloat where(bool c, __bfloat a, __bfloat b) {
-  return c ? a : b;
-}
-
-__device__ int64_t where(bool c, int64_t a, int64_t b) {
-  return c ? a : b;
-}
-
-__device__ int where(bool c, int a, int b) {
-  return c ? a : b;
-}
-
-__device__ int64_t where(bool c, int64_t a, int b) {
-  return c ? a : b;
-}
-
-__device__ int64_t where(bool c, int a, int64_t b) {
-  return c ? a : b;
-}
-
 __device__ constexpr int64_t remainder(int64_t a, int64_t b) {
   auto mod = a % b;
   if ((mod != 0) && ((b < 0) != (mod < 0)))