Skip to content

Commit 0c723b0

Browse files
committed
[CIR] Lower nested local constant alloca
This patch adds support for lowering local constants in nested scopes, including those in nested loops. For those constant allocas in non-loop inner scopes, this patch keeps their constant flags during alloca hoisting. LLVM lowering would correctly emit necessary invariant metadata for those allocas. For those constant allocas in a loop, this patch introduces a new operation `cir.invariant_group` that marks the beginning of the lifetime of the constant objects. This operation is put at the location of the alloca operation before hoisting them. This patch updates LLVM lowering to emit the necessary invariant metadata when loading and storing through such pointers. This patch takes care of the special case where the constant alloca represents a variable declared in the condition part of a while loop. In such a case, this patch removes the constant flag on the alloca operation when hositing them.
1 parent f8821e8 commit 0c723b0

File tree

5 files changed

+341
-23
lines changed

5 files changed

+341
-23
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

+57
Original file line numberDiff line numberDiff line change
@@ -3562,6 +3562,63 @@ def LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
35623562

35633563
}
35643564

3565+
//===----------------------------------------------------------------------===//
3566+
// InvariantGroupOp
3567+
//===----------------------------------------------------------------------===//
3568+
3569+
def InvariantGroupOp
3570+
: CIR_Op<"invariant_group", [Pure, SameOperandsAndResultType]> {
3571+
let summary = "Start an invariant group";
3572+
let description = [{
3573+
The `cir.invariant_group` operation takes a single pointer value as argument
3574+
and returns the same pointer value with fresh [invariant group] information.
3575+
All loads and stores that access the returned pointer value are presumed by
3576+
the optimizer to load or store the same value.
3577+
3578+
[invariant group]: https://llvm.org/docs/LangRef.html#invariant-group-metadata
3579+
3580+
This operation is not emitted during CIRGen. Instead, it is created when
3581+
hoisting constant alloca operations to the entry block of a function. This
3582+
operation effectively marks the syntactic scope of the constant local
3583+
variable represented by the hosited alloca operation, and it allows for
3584+
better LLVMIR generation with potentially more optimizations.
3585+
3586+
For example, if we have the following CIR before alloca hoisting:
3587+
3588+
```mlir
3589+
cir.func @foo() {
3590+
cir.scope {
3591+
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
3592+
use(%0)
3593+
}
3594+
}
3595+
```
3596+
3597+
After alloca hoisting:
3598+
3599+
```mlir
3600+
cir.func @foo() {
3601+
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
3602+
cir.scope {
3603+
%1 = cir.invariant_group %0 : !cir.ptr<!s32i>
3604+
use(%1)
3605+
}
3606+
}
3607+
```
3608+
3609+
During LLVMIR lowering, load and store operations whose pointer operand
3610+
comes from `cir.invariant_group` are lowered to corresponding LLVM
3611+
instructions with invariant group metadata attached.
3612+
}];
3613+
3614+
let arguments = (ins CIR_PointerType:$ptr);
3615+
let results = (outs CIR_PointerType:$result);
3616+
3617+
let assemblyFormat = [{
3618+
$ptr `:` type($result) attr-dict
3619+
}];
3620+
}
3621+
35653622
//===----------------------------------------------------------------------===//
35663623
// DeleteArrayOp
35673624
//===----------------------------------------------------------------------===//

clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp

+155-10
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,142 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
2828
void runOnOperation() override;
2929
};
3030

31-
static void process(cir::FuncOp func) {
31+
static bool isOpInLoop(mlir::Operation *op) {
32+
return op->getParentOfType<cir::LoopOpInterface>();
33+
}
34+
35+
static bool hasStoreToAllocaInWhileCond(cir::AllocaOp alloca) {
36+
// This function determines whether the given alloca operation represents
37+
// a variable defined as a while loop's condition.
38+
//
39+
// Specifically, C/C++ allows the condition of a while loop be a variable
40+
// declaration:
41+
//
42+
// while (const int x = foo()) { /* body... */ }
43+
//
44+
// CIRGen would emit the following CIR for the above code:
45+
//
46+
// cir.scope {
47+
// %x.slot = cir.alloca !s32i [init, const]
48+
// cir.while {
49+
// %0 = cir.call @foo()
50+
// cir.store %0, %x
51+
// %1 = cir.load %x
52+
// %2 = cir.cast int_to_bool %1
53+
// cir.condition(%2)
54+
// } do {
55+
// // loop body goes here.
56+
// }
57+
// }
58+
//
59+
// Note that %x.slot is emitted outside the cir.while operation. Ideally, the
60+
// cir.while operation should cover this cir.alloca operation, but currently
61+
// CIR does not work this way. When hoisting such an alloca operation, one
62+
// must remove the "const" flag from it, otherwise LLVM lowering code will
63+
// mistakenly attach invariant group metadata to the load and store operations
64+
// in the while body, indicating that all loads and stores across all
65+
// iterations of the loop are constant.
66+
67+
for (mlir::Operation *user : alloca->getUsers()) {
68+
if (!mlir::isa<cir::StoreOp>(user))
69+
continue;
70+
71+
auto store = mlir::cast<cir::StoreOp>(user);
72+
mlir::Operation *storeParentOp = store->getParentOp();
73+
if (!mlir::isa<cir::WhileOp>(storeParentOp))
74+
continue;
75+
76+
auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
77+
return &whileOp.getCond() == store->getParentRegion();
78+
}
79+
80+
return false;
81+
}
82+
83+
static void processConstAlloca(cir::AllocaOp alloca) {
84+
// When optimization is enabled, LLVM lowering would start emitting invariant
85+
// group metadata for loads and stores to alloca-ed objects with "const"
86+
// attribute. For example, the following CIR:
87+
//
88+
// %slot = cir.alloca !s32i [init, const]
89+
// cir.store %0, %slot
90+
// %1 = cir.load %slot
91+
//
92+
// would be lowered to the following LLVM IR:
93+
//
94+
// %slot = alloca i32, i64 1
95+
// store i32 %0, ptr %slot, !invariant.group !0
96+
// %1 = load i32, ptr %slot, !invariant.group !0
97+
//
98+
// The invariant group metadata would tell LLVM optimizer that the store and
99+
// load instruction would store and load the same value from %slot.
100+
//
101+
// So far so good. Things started to get tricky when such an alloca operation
102+
// appears in the body of a loop construct:
103+
//
104+
// cir.some_loop_construct {
105+
// %slot = cir.alloca !s32i [init, const]
106+
// cir.store %0, %slot
107+
// %1 = cir.load %slot
108+
// }
109+
//
110+
// After alloca hoisting, the CIR code above would be transformed into:
111+
//
112+
// %slot = cir.alloca !s32i [init, const]
113+
// cir.some_loop_construct {
114+
// cir.store %0, %slot
115+
// %1 = cir.load %slot
116+
// }
117+
//
118+
// Notice how alloca hoisting change the semantics of the program in such a
119+
// case. The transformed code now indicates the optimizer that the load and
120+
// store operations load and store the same value **across all iterations of
121+
// the loop**!
122+
//
123+
// To overcome this problem, we instead transform the program into this:
124+
//
125+
// %slot = cir.alloca !s32i [init, const]
126+
// cir.some_loop_construct {
127+
// %slot.inv = cir.invariant_group %slot
128+
// cir.store %0, %slot.inv
129+
// %1 = cir.load %slot.inv
130+
// }
131+
//
132+
// The cir.invariant_group operation attaches fresh invariant information to
133+
// the operand pointer and yields a pointer with the fresh invariant
134+
// information. Upon each loop iteration, the old invariant information is
135+
// disgarded, and a new invariant information is attached, thus the correct
136+
// program semantic retains. During LLVM lowering, the cir.invariant_group
137+
// operation would eventually become an intrinsic call to
138+
// @llvm.launder.invariant.group.
139+
140+
if (isOpInLoop(alloca)) {
141+
// Mark the alloca-ed pointer as invariant via the cir.invariant_group
142+
// operation.
143+
mlir::OpBuilder builder(alloca);
144+
auto invariantGroupOp =
145+
builder.create<cir::InvariantGroupOp>(alloca.getLoc(), alloca);
146+
147+
// And replace all uses of the original alloca-ed pointer with the marked
148+
// pointer (which carries invariant group information).
149+
alloca->replaceUsesWithIf(
150+
invariantGroupOp,
151+
[op = invariantGroupOp.getOperation()](mlir::OpOperand &use) {
152+
return use.getOwner() != op;
153+
});
154+
} else if (hasStoreToAllocaInWhileCond(alloca)) {
155+
// The alloca represents a variable declared as the condition of a while
156+
// loop. In CIR, the alloca would be emitted at a scope outside of the
157+
// while loop. We have to remove the constant flag during hoisting,
158+
// otherwise we would be telling the optimizer that the alloca-ed value
159+
// is constant across all iterations of the while loop.
160+
//
161+
// See the body of the isWhileCondition function for more details.
162+
alloca.setConstant(false);
163+
}
164+
}
165+
166+
static void process(mlir::ModuleOp mod, cir::FuncOp func) {
32167
if (func.getRegion().empty())
33168
return;
34169

@@ -47,25 +182,35 @@ static void process(cir::FuncOp func) {
47182
return;
48183

49184
mlir::Operation *insertPoint = &*entryBlock.begin();
185+
auto optInfoAttr = mlir::cast_if_present<cir::OptInfoAttr>(
186+
mod->getAttr(cir::CIRDialect::getOptInfoAttrName()));
187+
unsigned optLevel = optInfoAttr ? optInfoAttr.getLevel() : 0;
50188

51189
for (auto alloca : allocas) {
52-
alloca->moveBefore(insertPoint);
53190
if (alloca.getConstant()) {
54-
// Hoisted alloca may come from the body of a loop, in which case the
55-
// stack slot is re-used by multiple objects alive in different iterations
56-
// of the loop. In theory, each of these objects are still constant within
57-
// their lifetimes, but currently we're not emitting metadata to further
58-
// describe this. So for now let's behave conservatively and remove the
59-
// const flag on nested allocas when hoisting them.
60-
alloca.setConstant(false);
191+
if (optLevel == 0) {
192+
// Under non-optimized builds, just remove the constant flag.
193+
alloca.setConstant(false);
194+
continue;
195+
}
196+
197+
processConstAlloca(alloca);
61198
}
199+
200+
alloca->moveBefore(insertPoint);
62201
}
63202
}
64203

65204
void HoistAllocasPass::runOnOperation() {
66205
llvm::TimeTraceScope scope("Hoist Allocas");
67206
llvm::SmallVector<Operation *, 16> ops;
68-
getOperation()->walk([&](cir::FuncOp op) { process(op); });
207+
208+
Operation *op = getOperation();
209+
auto mod = mlir::dyn_cast<mlir::ModuleOp>(op);
210+
if (!mod)
211+
mod = op->getParentOfType<mlir::ModuleOp>();
212+
213+
getOperation()->walk([&](cir::FuncOp op) { process(mod, op); });
69214
}
70215

71216
} // namespace

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

+29-13
Original file line numberDiff line numberDiff line change
@@ -1649,6 +1649,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> &memorder) {
16491649
llvm_unreachable("unknown memory order");
16501650
}
16511651

1652+
static bool isLoadOrStoreInvariant(mlir::Value addr) {
1653+
if (auto addrAllocaOp =
1654+
mlir::dyn_cast_if_present<cir::AllocaOp>(addr.getDefiningOp()))
1655+
return addrAllocaOp.getConstant();
1656+
if (mlir::isa_and_present<cir::InvariantGroupOp>(addr.getDefiningOp()))
1657+
return true;
1658+
return false;
1659+
}
1660+
16521661
mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16531662
cir::LoadOp op, OpAdaptor adaptor,
16541663
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1668,12 +1677,8 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16681677
auto invariant = false;
16691678
// Under -O1 or higher optimization levels, add the invariant metadata if the
16701679
// load operation loads from a constant object.
1671-
if (lowerMod &&
1672-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1673-
auto addrAllocaOp =
1674-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1675-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1676-
}
1680+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1681+
invariant = isLoadOrStoreInvariant(op.getAddr());
16771682

16781683
// TODO: nontemporal, syncscope.
16791684
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
@@ -1708,12 +1713,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
17081713
auto invariant = false;
17091714
// Under -O1 or higher optimization levels, add the invariant metadata if the
17101715
// store operation stores to a constant object.
1711-
if (lowerMod &&
1712-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1713-
auto addrAllocaOp =
1714-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1715-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1716-
}
1716+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1717+
invariant = isLoadOrStoreInvariant(op.getAddr());
17171718

17181719
// Convert adapted value to its memory type if needed.
17191720
mlir::Value value = emitToMemory(rewriter, dataLayout,
@@ -3700,6 +3701,20 @@ mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
37003701
return mlir::success();
37013702
}
37023703

3704+
mlir::LogicalResult CIRToLLVMInvariantGroupOpLowering::matchAndRewrite(
3705+
cir::InvariantGroupOp op, OpAdaptor adaptor,
3706+
mlir::ConversionPatternRewriter &rewriter) const {
3707+
if (!lowerMod ||
3708+
lowerMod->getContext().getCodeGenOpts().OptimizationLevel == 0) {
3709+
rewriter.replaceOp(op, adaptor.getPtr());
3710+
return mlir::success();
3711+
}
3712+
3713+
rewriter.replaceOpWithNewOp<mlir::LLVM::LaunderInvariantGroupOp>(
3714+
op, adaptor.getPtr());
3715+
return mlir::success();
3716+
}
3717+
37033718
mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite(
37043719
cir::PrefetchOp op, OpAdaptor adaptor,
37053720
mlir::ConversionPatternRewriter &rewriter) const {
@@ -4143,7 +4158,8 @@ void populateCIRToLLVMConversionPatterns(
41434158
CIRToLLVMBaseDataMemberOpLowering,
41444159
CIRToLLVMCmpOpLowering,
41454160
CIRToLLVMDerivedDataMemberOpLowering,
4146-
CIRToLLVMGetRuntimeMemberOpLowering
4161+
CIRToLLVMGetRuntimeMemberOpLowering,
4162+
CIRToLLVMInvariantGroupOpLowering
41474163
// clang-format on
41484164
>(converter, patterns.getContext(), lowerModule);
41494165
patterns.add<

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

+15
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,21 @@ class CIRToLLVMInlineAsmOpLowering
962962
mlir::ConversionPatternRewriter &) const override;
963963
};
964964

965+
class CIRToLLVMInvariantGroupOpLowering
966+
: public mlir::OpConversionPattern<cir::InvariantGroupOp> {
967+
cir::LowerModule *lowerMod;
968+
969+
public:
970+
CIRToLLVMInvariantGroupOpLowering(const mlir::TypeConverter &typeConverter,
971+
mlir::MLIRContext *context,
972+
cir::LowerModule *lowerModule)
973+
: OpConversionPattern(typeConverter, context), lowerMod(lowerModule) {}
974+
975+
mlir::LogicalResult
976+
matchAndRewrite(cir::InvariantGroupOp op, OpAdaptor,
977+
mlir::ConversionPatternRewriter &) const override;
978+
};
979+
965980
class CIRToLLVMPrefetchOpLowering
966981
: public mlir::OpConversionPattern<cir::PrefetchOp> {
967982
public:

0 commit comments

Comments
 (0)