Demote(B)Float16 pass: only keep enabled for PPC. (#55486)

maleadt · web-flow · commit faa6095e983f · 2024-08-17T17:00:26.000+02:00
LLVM should handle this properly now for everything but PPC (where
BFoat16 isn't supported anyway).
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
@@ -49,37 +49,28 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-static bool have_fp16(Function &caller, const Triple &TT) {
-    Attribute FSAttr = caller.getFnAttribute("target-features");
-    StringRef FS = "";
-    if (FSAttr.isValid())
-        FS = FSAttr.getValueAsString();
-    else if (jl_ExecutionEngine)
-        FS = jl_ExecutionEngine->getTargetFeatureString();
-    // else probably called from opt, just do nothing
-    if (TT.isAArch64()) {
-        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-            return true;
-        }
-    } else if (TT.getArch() == Triple::x86_64) {
-        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-            return true;
-        }
-    }
-    if (caller.hasFnAttribute("julia.hasfp16")) {
-        return true;
-    }
-    return false;
+static bool have_fp16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasfp16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision
+    if (TT.isPPC())
+        return false;
+
+    return true;
 }
 
-static bool have_bf16(Function &caller, const Triple &TT) {
-    if (caller.hasFnAttribute("julia.hasbf16")) {
-        return true;
-    }
+static bool have_bf16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasbf16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
 
-    // there's no targets that fully support bfloat yet;,
-    // AVX512BF16 only provides conversion and dot product instructions.
-    return false;
+    // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199:
+    // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf
+    return true;
 }
 
 static bool demoteFloat16(Function &F)
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
@@ -16,29 +16,3 @@ import Base.FastMath
 
 # CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
-
-
-# Float16 operations should be performed as Float32, unless @fastmath is specified
-# TODO: this is not true for platforms that natively support Float16
-
-foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK-DAG: %[[XEXT:[0-9]+]] = fpext half %[[X]] to float
-# CHECK-DAG: %[[YEXT:[0-9]+]] = fpext half %[[Y]] to float
-# CHECK: %[[DIFF:[0-9]+]] = fsub float %[[XEXT]], %[[YEXT]]
-# CHECK: %[[TRUNC:[0-9]+]] = fptrunc float %[[DIFF]] to half
-# CHECK: %[[DIFFEXT:[0-9]+]] = fpext half %[[TRUNC]] to float
-# CHECK: %[[CMP:[0-9]+]] = fcmp oeq float %[[DIFFEXT]], 0.000000e+00
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
-
-@fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK: %[[DIFF:[0-9]+]] = fsub fast half %[[X]], %[[Y]]
-# CHECK: %[[CMP:[0-9]+]] = fcmp fast oeq half %[[DIFF]], 0xH0000
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
@@ -99,7 +99,7 @@ top:
   ret half %13
 }
 
-define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) {
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 {
 top:
 ; CHECK-LABEL: @demote_bfloat_test(
 ; CHECK-NEXT:  top:
@@ -160,5 +160,70 @@ top:
   ret bfloat %13
 }
 
-attributes #0 = { "target-features"="-avx512fp16" }
-attributes #1 = { "target-features"="+avx512fp16" }
+define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 {
+top:
+; CHECK-LABEL: @native_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fadd bfloat %a, %b
+; CHECK-NEXT:    %1 = fadd bfloat %0, %b
+; CHECK-NEXT:    %2 = fadd bfloat %1, %b
+; CHECK-NEXT:    %3 = fmul bfloat %2, %b
+; CHECK-NEXT:    %4 = fdiv bfloat %3, %b
+; CHECK-NEXT:    %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+; CHECK-NEXT:    %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+; CHECK-NEXT:    %9 = fadd <2 x bfloat> %6, %8
+; CHECK-NEXT:    %10 = extractelement <2 x bfloat> %9, i32 0
+; CHECK-NEXT:    %11 = extractelement <2 x bfloat> %9, i32 1
+; CHECK-NEXT:    %12 = fadd bfloat %10, %11
+; CHECK-NEXT:    %13 = fadd bfloat %12, %4
+; CHECK-NEXT:    ret bfloat %13
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define i1 @fast_half_test(half %0, half %1) #0 {
+top:
+; CHECK-LABEL: @fast_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast half %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq half %2, 0xH0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast half %0, %1
+  %3 = fcmp fast oeq half %2, 0xH0000
+  ret i1 %3
+}
+
+define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 {
+top:
+; CHECK-LABEL: @fast_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast bfloat %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq bfloat %2, 0xR0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast bfloat %0, %1
+  %3 = fcmp fast oeq bfloat %2, 0xR0000
+  ret i1 %3
+}
+
+attributes #0 = { "julia.hasfp16"="false" }
+attributes #1 = { "julia.hasfp16"="true" }
+attributes #2 = { "julia.hasbf16"="false" }
+attributes #3 = { "julia.hasbf16"="true" }