Skip to content

Commit 26a1d66

Browse files
phoebewangtstellar
authored andcommitted
[X86] Add missing subvector_subreg_lowering for BF16 (#83720)
1 parent 0bf7ff1 commit 26a1d66

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

llvm/lib/Target/X86/X86InstrVecCompiler.td

+3
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
8383
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
8484
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
8585
defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
86+
defm : subvector_subreg_lowering<VR128, v8bf16, VR256, v16bf16, sub_xmm>;
8687

8788
// A 128-bit subvector extract from the first 512-bit vector position is a
8889
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -95,6 +96,7 @@ defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
9596
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
9697
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
9798
defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
99+
defm : subvector_subreg_lowering<VR128, v8bf16, VR512, v32bf16, sub_xmm>;
98100

99101
// A 128-bit subvector extract from the first 512-bit vector position is a
100102
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -107,6 +109,7 @@ defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
107109
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
108110
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
109111
defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
112+
defm : subvector_subreg_lowering<VR256, v16bf16, VR512, v32bf16, sub_ymm>;
110113

111114

112115
// If we're inserting into an all zeros vector, just use a plain move which

llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll

+22
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,25 @@ entry:
381381
%1 = shufflevector <8 x bfloat> %0, <8 x bfloat> undef, <16 x i32> zeroinitializer
382382
ret <16 x bfloat> %1
383383
}
384+
385+
define <16 x i32> @pr83358() {
386+
; X86-LABEL: pr83358:
387+
; X86: # %bb.0:
388+
; X86-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
389+
; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
390+
; X86-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
391+
; X86-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
392+
; X86-NEXT: retl # encoding: [0xc3]
393+
;
394+
; X64-LABEL: pr83358:
395+
; X64: # %bb.0:
396+
; X64-NEXT: vcvtneps2bf16y {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0x05,A,A,A,A]
397+
; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
398+
; X64-NEXT: vshufi64x2 $0, %zmm0, %zmm0, %zmm0 # encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc0,0x00]
399+
; X64-NEXT: # zmm0 = zmm0[0,1,0,1,0,1,0,1]
400+
; X64-NEXT: retq # encoding: [0xc3]
401+
%1 = call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>)
402+
%2 = bitcast <8 x bfloat> %1 to <4 x i32>
403+
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
404+
ret <16 x i32> %3
405+
}

llvm/test/CodeGen/X86/bfloat.ll

-1
Original file line numberDiff line numberDiff line change
@@ -2423,7 +2423,6 @@ define <16 x bfloat> @fptrunc_v16f32(<16 x float> %a) nounwind {
24232423
; AVXNC-LABEL: fptrunc_v16f32:
24242424
; AVXNC: # %bb.0:
24252425
; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0
2426-
; AVXNC-NEXT: vinsertf128 $0, %xmm0, %ymm0, %ymm0
24272426
; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm1, %xmm1
24282427
; AVXNC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
24292428
; AVXNC-NEXT: retq

0 commit comments

Comments
 (0)