Skip to content

Commit 0364dd2

Browse files
authored
[CIR] Add support for nontemporal loads and stores (#1494)
This PR adds a new boolean flag to the `cir.load` and the `cir.store` operation that distinguishes nontemporal loads and stores. Besides, this PR also adds support for the `__builtin_nontemporal_load` and the `__builtin_nontemporal_store` intrinsic function.
1 parent 72b68b7 commit 0364dd2

File tree

12 files changed

+90
-32
lines changed

12 files changed

+90
-32
lines changed

clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

+8-4
Original file line numberDiff line numberDiff line change
@@ -160,13 +160,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
160160
}
161161

162162
cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
163-
bool isVolatile = false, uint64_t alignment = 0) {
163+
bool isVolatile = false, bool isNontemporal = false,
164+
uint64_t alignment = 0) {
164165
mlir::IntegerAttr intAttr;
165166
if (alignment)
166167
intAttr = mlir::IntegerAttr::get(
167168
mlir::IntegerType::get(ptr.getContext(), 64), alignment);
168169

169170
return create<cir::LoadOp>(loc, ptr, /*isDeref=*/false, isVolatile,
171+
isNontemporal,
170172
/*alignment=*/intAttr,
171173
/*mem_order=*/
172174
cir::MemOrderAttr{},
@@ -175,7 +177,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
175177

176178
mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr,
177179
uint64_t alignment) {
178-
return createLoad(loc, ptr, /*isVolatile=*/false, alignment);
180+
return createLoad(loc, ptr, /*isVolatile=*/false, /*isNontemporal=*/false,
181+
alignment);
179182
}
180183

181184
mlir::Value createNot(mlir::Value value) {
@@ -350,13 +353,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
350353
}
351354

352355
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst,
353-
bool _volatile = false,
356+
bool isVolatile = false, bool isNontemporal = false,
354357
::mlir::IntegerAttr align = {},
355358
cir::MemOrderAttr order = {}) {
356359
if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
357360
val.getType())
358361
dst = createPtrBitcast(dst, val.getType());
359-
return create<cir::StoreOp>(loc, val, dst, _volatile, align, order,
362+
return create<cir::StoreOp>(loc, val, dst, isVolatile, isNontemporal, align,
363+
order,
360364
/*tbaa=*/cir::TBAAAttr{});
361365
}
362366

clang/include/clang/CIR/Dialect/IR/CIROps.td

+4
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ def LoadOp : CIR_Op<"load", [
587587
let arguments = (ins Arg<CIR_PointerType, "the address to load from",
588588
[MemRead]>:$addr, UnitAttr:$isDeref,
589589
UnitAttr:$is_volatile,
590+
UnitAttr:$is_nontemporal,
590591
OptionalAttr<I64Attr>:$alignment,
591592
OptionalAttr<MemOrder>:$mem_order,
592593
OptionalAttr<CIR_AnyTBAAAttr>:$tbaa
@@ -596,6 +597,7 @@ def LoadOp : CIR_Op<"load", [
596597
let assemblyFormat = [{
597598
(`deref` $isDeref^)?
598599
(`volatile` $is_volatile^)?
600+
(`nontemporal` $is_nontemporal^)?
599601
(`align` `(` $alignment^ `)`)?
600602
(`atomic` `(` $mem_order^ `)`)?
601603
$addr `:` qualified(type($addr)) `,` type($result) attr-dict
@@ -656,12 +658,14 @@ def StoreOp : CIR_Op<"store", [
656658
Arg<CIR_PointerType, "the address to store the value",
657659
[MemWrite]>:$addr,
658660
UnitAttr:$is_volatile,
661+
UnitAttr:$is_nontemporal,
659662
OptionalAttr<I64Attr>:$alignment,
660663
OptionalAttr<MemOrder>:$mem_order,
661664
OptionalAttr<CIR_AnyTBAAAttr>:$tbaa);
662665

663666
let assemblyFormat = [{
664667
(`volatile` $is_volatile^)?
668+
(`nontemporal` $is_nontemporal^)?
665669
(`align` `(` $alignment^ `)`)?
666670
(`atomic` `(` $mem_order^ `)`)?
667671
$value `,` $addr attr-dict `:` type($value) `,` qualified(type($addr))

clang/lib/CIR/CodeGen/CIRGenAtomic.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ static void emitAtomicOp(CIRGenFunction &CGF, AtomicExpr *E, Address Dest,
619619
// FIXME(cir): add scope information.
620620
assert(!cir::MissingFeatures::syncScopeID());
621621
builder.createStore(loc, loadVal1, Ptr, E->isVolatile(),
622+
/*isNontemporal=*/false,
622623
/*alignment=*/mlir::IntegerAttr{}, orderAttr);
623624
return;
624625
}

clang/lib/CIR/CodeGen/CIRGenBuilder.h

+15-11
Original file line numberDiff line numberDiff line change
@@ -859,32 +859,35 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
859859
}
860860

861861
cir::LoadOp createLoad(mlir::Location loc, Address addr,
862-
bool isVolatile = false) {
862+
bool isVolatile = false, bool isNontemporal = false) {
863863
auto ptrTy = mlir::dyn_cast<cir::PointerType>(addr.getPointer().getType());
864864
if (addr.getElementType() != ptrTy.getPointee())
865865
addr = addr.withPointer(
866866
createPtrBitcast(addr.getPointer(), addr.getElementType()));
867867

868868
return create<cir::LoadOp>(
869869
loc, addr.getElementType(), addr.getPointer(), /*isDeref=*/false,
870-
/*is_volatile=*/isVolatile, /*alignment=*/mlir::IntegerAttr{},
870+
/*is_volatile=*/isVolatile, /*is_nontemporal=*/isNontemporal,
871+
/*alignment=*/mlir::IntegerAttr{},
871872
/*mem_order=*/cir::MemOrderAttr{}, /*tbaa=*/cir::TBAAAttr{});
872873
}
873874

874875
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
875876
mlir::Value ptr, llvm::MaybeAlign align,
876-
bool isVolatile) {
877+
bool isVolatile, bool isNontemporal) {
877878
if (ty != mlir::cast<cir::PointerType>(ptr.getType()).getPointee())
878879
ptr = createPtrBitcast(ptr, ty);
879880
uint64_t alignment = align ? align->value() : 0;
880-
return CIRBaseBuilderTy::createLoad(loc, ptr, isVolatile, alignment);
881+
return CIRBaseBuilderTy::createLoad(loc, ptr, isVolatile, isNontemporal,
882+
alignment);
881883
}
882884

883885
cir::LoadOp createAlignedLoad(mlir::Location loc, mlir::Type ty,
884886
mlir::Value ptr, llvm::MaybeAlign align) {
885887
// TODO: make sure callsites shouldn't be really passing volatile.
886888
assert(!cir::MissingFeatures::volatileLoadOrStore());
887-
return createAlignedLoad(loc, ty, ptr, align, /*isVolatile=*/false);
889+
return createAlignedLoad(loc, ty, ptr, align, /*isVolatile=*/false,
890+
/*isNontemporal=*/false);
888891
}
889892

890893
cir::LoadOp
@@ -894,11 +897,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
894897
}
895898

896899
cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst,
897-
bool _volatile = false,
900+
bool isVolatile = false, bool isNontemporal = false,
898901
::mlir::IntegerAttr align = {},
899902
cir::MemOrderAttr order = {}) {
900-
return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), _volatile,
901-
align, order);
903+
return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), isVolatile,
904+
isNontemporal, align, order);
902905
}
903906

904907
cir::StoreOp createFlagStore(mlir::Location loc, bool val, mlir::Value dst) {
@@ -937,16 +940,17 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
937940
cir::StoreOp
938941
createAlignedStore(mlir::Location loc, mlir::Value val, mlir::Value dst,
939942
clang::CharUnits align = clang::CharUnits::One(),
940-
bool _volatile = false, cir::MemOrderAttr order = {}) {
943+
bool isVolatile = false, bool isNontemporal = false,
944+
cir::MemOrderAttr order = {}) {
941945
llvm::MaybeAlign mayAlign = align.getAsAlign();
942946
mlir::IntegerAttr alignAttr;
943947
if (mayAlign) {
944948
uint64_t alignment = mayAlign ? mayAlign->value() : 0;
945949
alignAttr = mlir::IntegerAttr::get(
946950
mlir::IntegerType::get(dst.getContext(), 64), alignment);
947951
}
948-
return CIRBaseBuilderTy::createStore(loc, val, dst, _volatile, alignAttr,
949-
order);
952+
return CIRBaseBuilderTy::createStore(loc, val, dst, isVolatile,
953+
isNontemporal, alignAttr, order);
950954
}
951955

952956
// Convert byte offset to sequence of high-level indices suitable for

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,24 @@ static mlir::Value makeBinaryAtomicValue(
306306
return emitFromInt(cgf, rmwi->getResult(0), typ, valueType);
307307
}
308308

309+
static void emitNontemporalStore(CIRGenFunction &cgf, const CallExpr *expr) {
310+
mlir::Value val = cgf.emitScalarExpr(expr->getArg(0));
311+
Address addr = cgf.emitPointerWithAlignment(expr->getArg(1));
312+
313+
val = cgf.emitToMemory(val, expr->getArg(0)->getType());
314+
LValue lv = cgf.makeAddrLValue(addr, expr->getArg(0)->getType());
315+
lv.setNontemporal(true);
316+
cgf.emitStoreOfScalar(val, lv, false);
317+
}
318+
319+
static mlir::Value emitNontemporalLoad(CIRGenFunction &cgf,
320+
const CallExpr *expr) {
321+
Address addr = cgf.emitPointerWithAlignment(expr->getArg(0));
322+
LValue lv = cgf.makeAddrLValue(addr, expr->getType());
323+
lv.setNontemporal(true);
324+
return cgf.emitLoadOfScalar(lv, expr->getExprLoc());
325+
}
326+
309327
static RValue emitBinaryAtomic(CIRGenFunction &CGF, cir::AtomicFetchKind kind,
310328
const CallExpr *E) {
311329
return RValue::get(makeBinaryAtomicValue(CGF, kind, E));
@@ -1907,10 +1925,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
19071925

19081926
case Builtin::BI__sync_synchronize:
19091927
llvm_unreachable("BI__sync_synchronize NYI");
1928+
19101929
case Builtin::BI__builtin_nontemporal_load:
1911-
llvm_unreachable("BI__builtin_nontemporal_load NYI");
1930+
return RValue::get(emitNontemporalLoad(*this, E));
19121931
case Builtin::BI__builtin_nontemporal_store:
1913-
llvm_unreachable("BI__builtin_nontemporal_store NYI");
1932+
emitNontemporalStore(*this, E);
1933+
return RValue::get(nullptr);
1934+
19141935
case Builtin::BI__c11_atomic_is_lock_free:
19151936
llvm_unreachable("BI__c11_atomic_is_lock_free NYI");
19161937
case Builtin::BI__atomic_is_lock_free:

clang/lib/CIR/CodeGen/CIRGenExpr.cpp

+4-9
Original file line numberDiff line numberDiff line change
@@ -662,11 +662,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr,
662662
}
663663

664664
assert(currSrcLoc && "must pass in source location");
665-
auto storeOp = builder.createStore(*currSrcLoc, value, addr, isVolatile);
666-
667-
if (isNontemporal) {
668-
llvm_unreachable("NYI");
669-
}
665+
auto storeOp =
666+
builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal);
670667

671668
CGM.decorateOperationWithTBAA(storeOp, tbaaInfo);
672669
}
@@ -2962,11 +2959,9 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
29622959
Ptr = builder.create<cir::CastOp>(loc, ElemPtrTy, cir::CastKind::bitcast,
29632960
Ptr);
29642961
}
2965-
auto loadOp = builder.CIRBaseBuilderTy::createLoad(loc, Ptr, isVolatile);
2962+
auto loadOp =
2963+
builder.CIRBaseBuilderTy::createLoad(loc, Ptr, isVolatile, isNontemporal);
29662964

2967-
if (isNontemporal) {
2968-
llvm_unreachable("NYI");
2969-
}
29702965
CGM.decorateOperationWithTBAA(loadOp, tbaaInfo);
29712966

29722967
assert(!cir::MissingFeatures::emitScalarRangeCheck() && "NYI");

clang/lib/CIR/CodeGen/CIRGenFunction.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -2041,7 +2041,7 @@ class CIRGenFunction : public CIRGenTypeCache {
20412041
builder.restoreInsertionPoint(OutermostConditional->getInsertPoint());
20422042
builder.createStore(
20432043
value.getLoc(), value, addr,
2044-
/*volatile*/ false,
2044+
/*isVolatile=*/false, /*isNontemporal=*/false,
20452045
mlir::IntegerAttr::get(
20462046
mlir::IntegerType::get(value.getContext(), 64),
20472047
(uint64_t)addr.getAlignment().getAsAlign().value()));

clang/lib/CIR/CodeGen/CIRGenValue.h

+1
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ class LValue {
235235
void setNonGC(bool Value) { NonGC = Value; }
236236

237237
bool isNontemporal() const { return Nontemporal; }
238+
void setNontemporal(bool value) { Nontemporal = value; }
238239

239240
bool isObjCWeak() const {
240241
return Quals.getObjCGCAttr() == clang::Qualifiers::Weak;

clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -149,9 +149,10 @@ DeletionKind cir::CopyOp::removeBlockingUses(
149149
OpBuilder &builder, Value reachingDefinition,
150150
const DataLayout &dataLayout) {
151151
if (loadsFrom(slot))
152-
builder.create<cir::StoreOp>(getLoc(), reachingDefinition, getDst(), false,
153-
mlir::IntegerAttr{}, cir::MemOrderAttr(),
154-
cir::TBAAAttr{});
152+
builder.create<cir::StoreOp>(getLoc(), reachingDefinition, getDst(),
153+
/*is_volatile=*/false,
154+
/*is_nontemporal=*/false, mlir::IntegerAttr{},
155+
cir::MemOrderAttr(), cir::TBAAAttr{});
155156
return DeletionKind::Delete;
156157
}
157158

clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ void ItaniumCXXABI::lowerGetMethod(
384384
op.getLoc(), vtablePtrPtrTy, cir::CastKind::bitcast, loweredObjectPtr);
385385
mlir::Value vtablePtr = rewriter.create<cir::LoadOp>(
386386
op.getLoc(), vtablePtrPtr, /*isDeref=*/false, /*isVolatile=*/false,
387+
/*isNontemporal=*/false,
387388
/*alignment=*/mlir::IntegerAttr(), /*mem_order=*/cir::MemOrderAttr(),
388389
/*tbaa=*/mlir::ArrayAttr());
389390

@@ -418,6 +419,7 @@ void ItaniumCXXABI::lowerGetMethod(
418419
op.getLoc(), vfpPtrTy, cir::CastKind::bitcast, vfpAddr);
419420
funcPtr = rewriter.create<cir::LoadOp>(
420421
op.getLoc(), vfpPtr, /*isDeref=*/false, /*isVolatile=*/false,
422+
/*isNontemporal=*/false,
421423
/*alignment=*/mlir::IntegerAttr(),
422424
/*mem_order=*/cir::MemOrderAttr(),
423425
/*tbaa=*/mlir::ArrayAttr());

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1680,7 +1680,7 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16801680
// TODO: nontemporal, syncscope.
16811681
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
16821682
op->getLoc(), llvmTy, adaptor.getAddr(), /* alignment */ alignment,
1683-
op.getIsVolatile(), /* nontemporal */ false,
1683+
op.getIsVolatile(), /* nontemporal */ op.getIsNontemporal(),
16841684
/* invariant */ false, /* invariantGroup */ invariant, ordering);
16851685

16861686
// Convert adapted result to its original type if needed.
@@ -1722,7 +1722,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
17221722
// TODO: nontemporal, syncscope.
17231723
auto storeOp = rewriter.create<mlir::LLVM::StoreOp>(
17241724
op->getLoc(), value, adaptor.getAddr(), alignment, op.getIsVolatile(),
1725-
/* nontemporal */ false, /* invariantGroup */ invariant, ordering);
1725+
/* nontemporal */ op.getIsNontemporal(), /* invariantGroup */ invariant,
1726+
ordering);
17261727
rewriter.replaceOp(op, storeOp);
17271728
if (auto tbaa = op.getTbaaAttr()) {
17281729
storeOp.setTBAATags(lowerCIRTBAAAttr(tbaa, rewriter, lowerMod));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
5+
6+
int nontemporal_load(const int *ptr) {
7+
return __builtin_nontemporal_load(ptr);
8+
}
9+
10+
// CIR-LABEL: @_Z16nontemporal_loadPKi
11+
// CIR: %{{.+}} = cir.load nontemporal %{{.+}} : !cir.ptr<!s32i>, !s32i
12+
13+
// LLVM-LABEL: @_Z16nontemporal_loadPKi
14+
// LLVM: %{{.+}} = load i32, ptr %{{.+}}, align 4, !nontemporal !1
15+
16+
void nontemporal_store(int *ptr, int value) {
17+
__builtin_nontemporal_store(value, ptr);
18+
}
19+
20+
// CIR-LABEL: @_Z17nontemporal_storePii
21+
// CIR: cir.store nontemporal %{{.+}}, %{{.+}} : !s32i, !cir.ptr<!s32i>
22+
23+
// LLVM-LABEL: @_Z17nontemporal_storePii
24+
// LLVM: store i32 %{{.+}}, ptr %{{.+}}, align 4, !nontemporal !1

0 commit comments

Comments
 (0)