Skip to content

Commit 2fe7e81

Browse files
esukhovpszymich
authored andcommitted
Fixed bug in vectorizer
Wider extract elements are treated correctly now. (cherry picked from commit ae00cfb)
1 parent e4ddfe4 commit 2fe7e81

File tree

2 files changed

+98
-1
lines changed

2 files changed

+98
-1
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ unsigned int getConstantValueAsInt(Value *I) {
169169
return Result;
170170
}
171171

172-
unsigned int getVectorSize(Instruction *I) {
172+
unsigned int getVectorSize(Value *I) {
173173
IGCLLVM::FixedVectorType *VecType =
174174
llvm::dyn_cast<IGCLLVM::FixedVectorType>(I->getType());
175175
if (!VecType)
@@ -620,6 +620,11 @@ bool IGCVectorizer::checkInsertElement(Instruction *First, VecArr &Slice) {
620620
bool IGCVectorizer::checkExtractElement(Instruction *Compare, VecArr &Slice) {
621621
Value *CompareSource = Slice[0]->getOperand(0);
622622

623+
if (getVectorSize(CompareSource) != Slice.size()) {
624+
PRINT_LOG_NL("Extract is wider than the slice, need additional handling, not implemented");
625+
return false;
626+
}
627+
623628
if (!llvm::isa<Instruction>(CompareSource)) {
624629
PRINT_LOG_NL("Source is not an instruction");
625630
return false;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
;=========================== begin_copyright_notice ============================
2+
;
3+
; Copyright (C) 2024 Intel Corporation
4+
;
5+
; SPDX-License-Identifier: MIT
6+
;
7+
;============================ end_copyright_notice =============================
8+
9+
; UNSUPPORTED: system-windows
10+
; RUN: igc_opt --igc-vectorizer -S -dce < %s 2>&1 | FileCheck %s
11+
12+
define spir_kernel void @wibble(<64 x float> %arg) {
13+
bb:
14+
br label %bb2
15+
16+
bb1: ; No predecessors!
17+
br label %bb2
18+
19+
bb2: ; preds = %bb1, %bb
20+
%tmp = phi <64 x float> [ zeroinitializer, %bb ], [ zeroinitializer, %bb1 ]
21+
%tmp3 = extractelement <64 x float> %arg, i64 0
22+
%tmp4 = extractelement <64 x float> %arg, i64 1
23+
%tmp5 = extractelement <64 x float> %arg, i64 2
24+
%tmp6 = extractelement <64 x float> %arg, i64 3
25+
%tmp7 = extractelement <64 x float> %arg, i64 4
26+
%tmp8 = extractelement <64 x float> %arg, i64 5
27+
%tmp9 = extractelement <64 x float> %arg, i64 6
28+
%tmp10 = extractelement <64 x float> %arg, i64 7
29+
%tmp11 = extractelement <64 x float> %arg, i64 8
30+
%tmp12 = extractelement <64 x float> %arg, i64 9
31+
%tmp13 = extractelement <64 x float> %arg, i64 10
32+
%tmp14 = extractelement <64 x float> %arg, i64 11
33+
%tmp15 = extractelement <64 x float> %arg, i64 12
34+
%tmp16 = extractelement <64 x float> %arg, i64 13
35+
%tmp17 = extractelement <64 x float> %arg, i64 14
36+
%tmp18 = extractelement <64 x float> %arg, i64 15
37+
br label %bb19
38+
39+
bb19: ; preds = %bb36, %bb2
40+
; CHECK-LABEL: bb19:
41+
; CHECK-NOT: phi <8 x float>{{.*}}[ %arg, %bb2 ]
42+
; CHECK: ret void
43+
%tmp20 = phi float [ %tmp18, %bb2 ], [ 0.000000e+00, %bb36 ]
44+
%tmp21 = phi float [ %tmp17, %bb2 ], [ 0.000000e+00, %bb36 ]
45+
%tmp22 = phi float [ %tmp16, %bb2 ], [ 0.000000e+00, %bb36 ]
46+
%tmp23 = phi float [ %tmp15, %bb2 ], [ 0.000000e+00, %bb36 ]
47+
%tmp24 = phi float [ %tmp14, %bb2 ], [ 0.000000e+00, %bb36 ]
48+
%tmp25 = phi float [ %tmp13, %bb2 ], [ 0.000000e+00, %bb36 ]
49+
%tmp26 = phi float [ %tmp12, %bb2 ], [ 0.000000e+00, %bb36 ]
50+
%tmp27 = phi float [ %tmp11, %bb2 ], [ 0.000000e+00, %bb36 ]
51+
%tmp28 = phi float [ %tmp10, %bb2 ], [ 0.000000e+00, %bb36 ]
52+
%tmp29 = phi float [ %tmp9, %bb2 ], [ 0.000000e+00, %bb36 ]
53+
%tmp30 = phi float [ %tmp8, %bb2 ], [ 0.000000e+00, %bb36 ]
54+
%tmp31 = phi float [ %tmp7, %bb2 ], [ 0.000000e+00, %bb36 ]
55+
%tmp32 = phi float [ %tmp6, %bb2 ], [ 0.000000e+00, %bb36 ]
56+
%tmp33 = phi float [ %tmp5, %bb2 ], [ 0.000000e+00, %bb36 ]
57+
%tmp34 = phi float [ %tmp4, %bb2 ], [ 0.000000e+00, %bb36 ]
58+
%tmp35 = phi float [ %tmp3, %bb2 ], [ 0.000000e+00, %bb36 ]
59+
ret void
60+
61+
bb36: ; No predecessors!
62+
%tmp37 = insertelement <8 x float> zeroinitializer, float %tmp35, i64 0
63+
%tmp38 = insertelement <8 x float> %tmp37, float %tmp34, i64 0
64+
%tmp39 = insertelement <8 x float> %tmp38, float %tmp33, i64 0
65+
%tmp40 = insertelement <8 x float> %tmp39, float %tmp32, i64 0
66+
%tmp41 = insertelement <8 x float> %tmp40, float %tmp31, i64 0
67+
%tmp42 = insertelement <8 x float> %tmp41, float %tmp30, i64 0
68+
%tmp43 = insertelement <8 x float> %tmp42, float %tmp29, i64 0
69+
%tmp44 = insertelement <8 x float> %tmp43, float %tmp28, i64 0
70+
%tmp45 = insertelement <8 x float> zeroinitializer, float %tmp27, i64 0
71+
%tmp46 = insertelement <8 x float> %tmp45, float %tmp26, i64 0
72+
%tmp47 = insertelement <8 x float> %tmp46, float %tmp25, i64 0
73+
%tmp48 = insertelement <8 x float> %tmp47, float %tmp24, i64 0
74+
%tmp49 = insertelement <8 x float> %tmp48, float %tmp23, i64 0
75+
%tmp50 = insertelement <8 x float> %tmp49, float %tmp22, i64 0
76+
%tmp51 = insertelement <8 x float> %tmp50, float %tmp21, i64 0
77+
%tmp52 = insertelement <8 x float> %tmp51, float %tmp20, i64 0
78+
%tmp53 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %tmp44, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
79+
%tmp54 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %tmp52, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
80+
br label %bb19
81+
}
82+
83+
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
84+
declare void @llvm.assume(i1 noundef) #0
85+
86+
declare i16 @llvm.genx.GenISA.simdLaneId()
87+
88+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
89+
90+
attributes #0 = { inaccessiblememonly nofree nosync nounwind willreturn }
91+
92+
!igc.functions = !{}

0 commit comments

Comments
 (0)