Skip to content

Commit b6be9c7

Browse files
KorovinVladfda0
authored andcommitted
Clean dirty cache lines for release fence
. (cherry picked from commit 2b94d63)
1 parent 34c10c5 commit b6be9c7

File tree

2 files changed

+25
-9
lines changed

2 files changed

+25
-9
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLoadStoreLowering.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,20 @@ static bool isSPIRVAtomic(const Value *Val) {
290290
}
291291
}
292292

293+
static LSC_FENCE_OP getLSCFenceOp(AtomicOrdering Ordering) {
294+
switch (Ordering) {
295+
default:
296+
return LSC_FENCE_OP_NONE;
297+
case AtomicOrdering::Acquire:
298+
return LSC_FENCE_OP_INVALIDATE;
299+
case AtomicOrdering::Release:
300+
return LSC_FENCE_OP_CLEAN;
301+
case AtomicOrdering::AcquireRelease:
302+
case AtomicOrdering::SequentiallyConsistent:
303+
return LSC_FENCE_OP_EVICT;
304+
}
305+
}
306+
293307
void GenXLoadStoreLowering::getAnalysisUsage(AnalysisUsage &AU) const {
294308
AU.addRequired<TargetPassConfig>();
295309
AU.addRequired<GenXBackendConfig>();
@@ -1110,13 +1124,8 @@ GenXLoadStoreLowering::createLSCStandAloneFence(FenceInst &I) const {
11101124
return nullptr;
11111125

11121126
bool IsGlobal = AS == vc::AddrSpace::Global;
1113-
bool IsInvalidateL1 = Ordering == AtomicOrdering::SequentiallyConsistent ||
1114-
Ordering == AtomicOrdering::AcquireRelease ||
1115-
Ordering == AtomicOrdering::Acquire;
1116-
IsInvalidateL1 &= IsGlobal;
1117-
1127+
auto FenceOp = IsGlobal ? getLSCFenceOp(Ordering) : LSC_FENCE_OP_NONE;
11181128
auto SubFuncID = IsGlobal ? LSC_UGM : LSC_SLM;
1119-
auto FenceOp = IsInvalidateL1 ? LSC_FENCE_OP_INVALIDATE : LSC_FENCE_OP_NONE;
11201129
auto Scope = getLSCFenceScope(&I);
11211130

11221131
auto *M = I.getModule();

IGC/VectorCompiler/test/LoadStoreLowering/fence.ll

+10-3
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,28 @@ define void @fence_acq() {
2121

2222
define void @fence_rel() {
2323
; CHECK: call void @llvm.genx.fence(i8 1)
24-
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 0, i8 2)
24+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 4, i8 2)
2525
fence syncscope("device") release
2626
ret void
2727
}
2828

2929
define void @fence_acq_rel() {
30+
; CHECK: call void @llvm.genx.fence(i8 65)
31+
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 0, i8 1, i8 2)
32+
fence syncscope("device") acq_rel
33+
ret void
34+
}
35+
36+
define void @fence_acq_rel_wg() {
3037
; CHECK: call void @llvm.genx.fence(i8 33)
3138
; CHECK-LSC: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
3239
fence syncscope("workgroup") acq_rel
3340
ret void
3441
}
3542

3643
define void @fence_seq_cst() {
37-
; CHECK-NOT: call void @llvm.genx.fence(i8 33)
38-
; CHECK-LSC-NOT: call void @llvm.genx.lsc.fence.i1(i1 true, i8 3, i8 0, i8 0)
44+
; CHECK-NOT: call void @llvm.genx.fence
45+
; CHECK-LSC-NOT: call void @llvm.genx.lsc.fence.i1
3946
fence syncscope("subgroup") seq_cst
4047
ret void
4148
}

0 commit comments

Comments
 (0)