Skip to content

Commit 85661c2

Browse files
vmustyapszymich
authored andcommitted
Adjust BFN matching heuristics for pattern matching
VC shouldn't match non-profitable patterns. Flag register cannot be source for BFN instruction. The finalizer emits extra MOV instructions in such cases. So, it's better to avoid them, when the total amount of instruction is not reduced. We also need to reduce the use threshold for instructions folded into the BFN. This will reduce the value live ranges and improve the register pressure. (cherry picked from commit c416341)
1 parent 051633b commit 85661c2

File tree

2 files changed

+51
-23
lines changed

2 files changed

+51
-23
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

+30-5
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,11 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
244244
Changed |= reassociateIntegerMad(&F);
245245
}
246246

247+
visit(F);
248+
247249
if (EnableBfnMatcher && ST->hasAdd3Bfn())
248250
matchBFN(F);
249251

250-
visit(F);
251-
252252
if (Kind == PatternMatchKind::PreLegalization) {
253253
Changed |= placeConstants(&F);
254254
Changed |= vectorizeConstants(&F);
@@ -398,7 +398,7 @@ class BfnMatcher {
398398
static constexpr StringRef OpNames[] = {"not", "and", "or", "xor"};
399399
static constexpr unsigned LutValues[] = {0xaa, 0xcc, 0xf0};
400400

401-
static constexpr unsigned UsesThreshold = 4;
401+
static constexpr unsigned UsesThreshold = 2;
402402
static constexpr unsigned SourceLimit = 3;
403403

404404
public:
@@ -413,7 +413,7 @@ class BfnMatcher {
413413
if (!Ty->isIntOrIntVectorTy(16) && !Ty->isIntOrIntVectorTy(32))
414414
return false;
415415

416-
unsigned MatchedOps = 0;
416+
MatchedOps = 0;
417417
Srcs.insert(MainInst);
418418

419419
// Grow the pattern to find the source operands using a BFS.
@@ -424,7 +424,7 @@ class BfnMatcher {
424424
auto *Inst = Queue.front();
425425
Queue.pop();
426426

427-
if (Inst->hasNUsesOrMore(UsesThreshold))
427+
if (MatchedOps > 0 && Inst->hasNUsesOrMore(UsesThreshold))
428428
return false;
429429

430430
auto Op = getOperation(Inst);
@@ -538,6 +538,11 @@ class BfnMatcher {
538538
SrcsOrdered[2] = NegCSrc2;
539539
}
540540

541+
if (!isProfitable()) {
542+
LLVM_DEBUG(dbgs() << "BFN: Not profitable\n");
543+
return false;
544+
}
545+
541546
IRBuilder<> Builder(MainInst);
542547

543548
auto Lut = getLutValue(MainInst);
@@ -645,10 +650,30 @@ class BfnMatcher {
645650
return false;
646651
}
647652

653+
static bool isFlagInput(Value *V) {
654+
auto *Cast = dyn_cast<BitCastInst>(V);
655+
if (!Cast)
656+
return false;
657+
658+
auto *Src = Cast->getOperand(0);
659+
auto *SrcTy = Src->getType();
660+
return SrcTy->isIntOrIntVectorTy(1);
661+
}
662+
663+
bool isProfitable() const {
664+
unsigned NumOfFlagInputs = llvm::count_if(SrcsOrdered, isFlagInput);
665+
if (NumOfFlagInputs >= MatchedOps)
666+
return false;
667+
668+
return true;
669+
}
670+
648671
BinaryOperator *MainInst;
649672
const bool TryGreedy;
650673
SmallSetVector<Value *, 4> Srcs;
651674
SmallVector<Value *, 4> SrcsOrdered;
675+
676+
unsigned MatchedOps = 0;
652677
};
653678

654679
// Class to identify cases where a comparison and select are equivalent to a

IGC/VectorCompiler/test/PatternMatch/bfn_match.ll

+21-18
Original file line numberDiff line numberDiff line change
@@ -108,24 +108,27 @@ define i32 @test_match_i32_combine_by_mask_const(i32 %a, i32 %b) {
108108
ret i32 %3
109109
}
110110

111-
define i32 @test_match_i32_const_zero(i32 %a, i32 %b, i32 %mask) {
112-
; CHECK: ret i32 0
113-
%nmask = xor i32 -1, %mask
114-
%1 = and i32 %a, %mask
115-
%2 = and i32 %b, %nmask
116-
%3 = or i32 %1, %2
117-
%4 = xor i32 %3, -1
118-
%5 = and i32 %3, %4
119-
ret i32 %5
111+
declare void @use(i32)
112+
113+
; CHECK-LABEL: @test_unmatch_i32(
114+
define i32 @test_unmatch_i32(i32 %op0, i32 %op1, i32 %op2) {
115+
; CHECK-NOT: call void @llvm.genx.bfn.i32.i32(
116+
%1 = and i32 %op0, %op1
117+
%2 = and i32 %1, %op2
118+
call void @use(i32 %1)
119+
120+
ret i32 %2
120121
}
121122

122-
define i32 @test_match_i32_const_one(i32 %a, i32 %b, i32 %mask) {
123-
; CHECK: ret i32 -1
124-
%nmask = xor i32 -1, %mask
125-
%1 = and i32 %a, %mask
126-
%2 = and i32 %b, %nmask
127-
%3 = or i32 %1, %2
128-
%4 = xor i32 %3, -1
129-
%5 = or i32 %3, %4
130-
ret i32 %5
123+
; CHECK-LABEL: @test_unmatch_flag
124+
define i32 @test_unmatch_flag(<32 x i1> %a, <32 x i1> %b, <32 x i1> %c) {
125+
%as = bitcast <32 x i1> %a to i32
126+
%bs = bitcast <32 x i1> %b to i32
127+
%cs = bitcast <32 x i1> %c to i32
128+
129+
; CHECK-NOT: call i32 @llvm.genx.bfn.i32.i32(
130+
%1 = and i32 %as, %bs
131+
%2 = and i32 %bs, %cs
132+
133+
ret i32 %2
131134
}

0 commit comments

Comments
 (0)