Skip to content

Commit de867e8

Browse files
committed
Guard truncate from vector float to vector __bf16 with !flag_rounding_math && HONOR_NANS (BFmode).
hw instruction doesn't raise exceptions, turns sNAN into qNAN quietly, and always round to nearest (even). Output denormals are always flushed to zero and input denormals are always treated as zero. MXCSR is not consulted nor updated. W/o native instructions, flag_unsafe_math_optimizations is needed for the permutation instructions. Similar guard extend from vector __bf16 to vector float with !HONOR_NANS (BFmode). gcc/ChangeLog: * config/i386/i386.md (truncsf2bf2): Add !flag_rounding_math to the condition, require flag_unsafe_math_optimizations when native instruction is not available. * config/i386/mmx.md: (truncv2sfv2bf2): Ditto. (extendv2bfv2sf2): Add !HONOR_NANS (BFmode) to the condition. * config/i386/sse.md: (truncv4sfv4sf2): Add !flag_rounding_math to the condition, require flag_unsafe_math_optimizations when native instruction is not available. (truncv8sfv8bf2): Ditto. (truncv16sfv16bf2): Ditto. (extendv4bfv4sf2): Add !HONOR_NANS (BFmode) to the condition. (extendv8bfv8sf2): Ditto. (extendv16bfv16sf2): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bf16-truncsfbf.c: Add -ffast-math. * gcc.target/i386/avx512bw-extendbf2sf.c: Ditto. * gcc.target/i386/avx512bw-truncsfbf.c: Ditto. * gcc.target/i386/sse2-extendbf2sf.c: Ditto. * gcc.target/i386/ssse3-truncsfbf.c: Ditto.
1 parent ca1cff0 commit de867e8

File tree

8 files changed

+33
-12
lines changed

8 files changed

+33
-12
lines changed

gcc/config/i386/i386.md

+10-1
Original file line numberDiff line numberDiff line change
@@ -5698,11 +5698,20 @@
56985698
(set_attr "prefix" "evex")
56995699
(set_attr "mode" "HF")])
57005700

5701+
/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
5702+
and it always round to even.
5703+
flag_unsafte_math_optimization is needed for psrld.
5704+
If we don't expect qNaNs nor sNaNs and can assume rounding
5705+
to nearest, we can expand the conversion inline as
5706+
(fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16. */
57015707
(define_insn "truncsfbf2"
57025708
[(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
57035709
(float_truncate:BF
57045710
(match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
5705-
"TARGET_SSE2 && flag_unsafe_math_optimizations && !HONOR_NANS (BFmode)"
5711+
"TARGET_SSE2 && !HONOR_NANS (BFmode) && !flag_rounding_math
5712+
&& (flag_unsafe_math_optimizations
5713+
|| TARGET_AVXNECONVERT
5714+
|| (TARGET_AVX512BF16 && TARGET_AVX512VL))"
57065715
"@
57075716
psrld\t{$16, %0|%0, 16}
57085717
%{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}

gcc/config/i386/mmx.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -2998,7 +2998,11 @@
29982998
[(set (match_operand:V2BF 0 "register_operand")
29992999
(float_truncate:V2BF
30003000
(match_operand:V2SF 1 "nonimmediate_operand")))]
3001-
"TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
3001+
"TARGET_SSSE3 && TARGET_MMX_WITH_SSE
3002+
&& !HONOR_NANS (BFmode) && !flag_rounding_math
3003+
&& (flag_unsafe_math_optimizations
3004+
|| TARGET_AVXNECONVERT
3005+
|| (TARGET_AVX512BF16 && TARGET_AVX512VL))"
30023006
{
30033007
rtx op1 = gen_reg_rtx (V4SFmode);
30043008
rtx op0 = gen_reg_rtx (V4BFmode);
@@ -3016,7 +3020,7 @@
30163020
[(set (match_operand:V2SF 0 "register_operand")
30173021
(float_extend:V2SF
30183022
(match_operand:V2BF 1 "nonimmediate_operand")))]
3019-
"TARGET_SSE2 && TARGET_MMX_WITH_SSE"
3023+
"TARGET_SSE2 && TARGET_MMX_WITH_SSE && !HONOR_NANS (BFmode)"
30203024
{
30213025
rtx op0 = gen_reg_rtx (V4SFmode);
30223026
rtx op1 = gen_reg_rtx (V4BFmode);

gcc/config/i386/sse.md

+12-4
Original file line numberDiff line numberDiff line change
@@ -30995,7 +30995,10 @@
3099530995
[(set (match_operand:V4BF 0 "register_operand")
3099630996
(float_truncate:V4BF
3099730997
(match_operand:V4SF 1 "nonimmediate_operand")))]
30998-
"TARGET_SSSE3"
30998+
"TARGET_SSSE3 && !HONOR_NANS (BFmode) && !flag_rounding_math
30999+
&& (flag_unsafe_math_optimizations
31000+
|| TARGET_AVXNECONVERT
31001+
|| (TARGET_AVX512BF16 && TARGET_AVX512VL))"
3099931002
{
3100031003
if (!TARGET_AVXNECONVERT
3100131004
&& !(TARGET_AVX512BF16 && TARGET_AVX512VL))
@@ -31088,7 +31091,10 @@
3108831091
[(set (match_operand:V8BF 0 "register_operand")
3108931092
(float_truncate:V8BF
3109031093
(match_operand:V8SF 1 "nonimmediate_operand")))]
31091-
"TARGET_AVX2"
31094+
"TARGET_AVX2 && !HONOR_NANS (BFmode) && !flag_rounding_math
31095+
&& (flag_unsafe_math_optimizations
31096+
|| TARGET_AVXNECONVERT
31097+
|| (TARGET_AVX512BF16 && TARGET_AVX512VL))"
3109231098
{
3109331099
if (!TARGET_AVXNECONVERT
3109431100
&& !(TARGET_AVX512BF16 && TARGET_AVX512VL))
@@ -31114,7 +31120,9 @@
3111431120
[(set (match_operand:V16BF 0 "register_operand")
3111531121
(float_truncate:V16BF
3111631122
(match_operand:V16SF 1 "nonimmediate_operand")))]
31117-
"TARGET_AVX512BW && TARGET_EVEX512"
31123+
"TARGET_AVX512BW && TARGET_EVEX512
31124+
&& !HONOR_NANS (BFmode) && !flag_rounding_math
31125+
&& (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
3111831126
{
3111931127
if (!TARGET_AVX512BF16)
3112031128
{
@@ -31127,7 +31135,7 @@
3112731135
[(set (match_operand:VF1_AVX512BW 0 "register_operand")
3112831136
(float_extend:VF1_AVX512BW
3112931137
(match_operand:<sf_cvt_bf16> 1 "nonimmediate_operand")))]
31130-
"TARGET_SSE2"
31138+
"TARGET_SSE2 && !HONOR_NANS (BFmode)"
3113131139
{
3113231140
ix86_expand_vector_bf2sf_with_vec_perm (operands[0], operands[1]);
3113331141
DONE;
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-mavx512vl -mavx512bf16 -O2" } */
2+
/* { dg-options "-mavx512vl -mavx512bf16 -O2 -ffast-math" } */
33
/* { dg-final { scan-assembler-times {(?n)vcvtneps2bf16} 6 } } */
44

55
#include "avx512bw-truncsfbf.c"

gcc/testsuite/gcc.target/i386/avx512bw-extendbf2sf.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-mavx512bw -mavx512vl -O2" } */
2+
/* { dg-options "-mavx512bw -mavx512vl -O2 -ffast-math" } */
33
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|vpunpcklwd)} 6 } } */
44

55
typedef float v4sf __attribute__((vector_size(16)));

gcc/testsuite/gcc.target/i386/avx512bw-truncsfbf.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2" } */
2+
/* { dg-options "-mavx512bw -mavx512vl -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
33
/* { dg-final { scan-assembler-times {(?n)(?:vpermw|vpshufb)} 6 } } */
44

55
typedef float v4sf __attribute__((vector_size(16)));

gcc/testsuite/gcc.target/i386/sse2-extendbf2sf.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-msse2 -O2" } */
2+
/* { dg-options "-msse2 -O2 -ffast-math" } */
33
/* { dg-final { scan-assembler-times {(?n)(?:vpermi2w|punpcklwd)} 2 { target { ! ia32 } } } } */
44

55
typedef float v2sf __attribute__((vector_size(8)));

gcc/testsuite/gcc.target/i386/ssse3-truncsfbf.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* { dg-do compile } */
2-
/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2" } */
2+
/* { dg-options "-mssse3 -mno-avx512bf16 -mno-avxneconvert -O2 -ffast-math" } */
33
/* { dg-final { scan-assembler-times {(?n)pshufb} 2 { target { ! ia32 } } } } */
44

55
typedef float v2sf __attribute__((vector_size(8)));

0 commit comments

Comments
 (0)