Skip to content

Commit 4e7443d

Browse files
committed
[CIR] Lower nested local constant alloca
This patch adds support for lowering local constants in nested scopes, including those in nested loops. For those constant allocas in non-loop inner scopes, this patch keeps their constant flags during alloca hoisting. LLVM lowering would correctly emit necessary invariant metadata for those allocas. For those constant allocas in a loop, this patch introduces a new operation `cir.invariant_group` that marks the beginning of the lifetime of the constant objects. This operation is put at the location of the alloca operation before hoisting them. This patch updates LLVM lowering to emit the necessary invariant metadata when loading and storing through such pointers. This patch takes care of the special case where the constant alloca represents a variable declared in the condition part of a while loop. In such a case, this patch removes the constant flag on the alloca operation when hositing them.
1 parent f8821e8 commit 4e7443d

File tree

5 files changed

+191
-21
lines changed

5 files changed

+191
-21
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

+22
Original file line numberDiff line numberDiff line change
@@ -3562,6 +3562,28 @@ def LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
35623562

35633563
}
35643564

3565+
//===----------------------------------------------------------------------===//
3566+
// InvariantGroupOp
3567+
//===----------------------------------------------------------------------===//
3568+
3569+
def InvariantGroupOp
3570+
: CIR_Op<"invariant_group", [Pure, SameOperandsAndResultType]> {
3571+
let summary = "Start an invariant group";
3572+
let description = [{
3573+
The `cir.invariant_group` operation takes a single pointer value as argument
3574+
and returns the same pointer value with a fresh invariant group. All loads
3575+
and stores that access the returned pointer value are presumed by the
3576+
optimizer to load or store the same value.
3577+
}];
3578+
3579+
let arguments = (ins CIR_PointerType:$ptr);
3580+
let results = (outs CIR_PointerType:$result);
3581+
3582+
let assemblyFormat = [{
3583+
$ptr `:` type($result) attr-dict
3584+
}];
3585+
}
3586+
35653587
//===----------------------------------------------------------------------===//
35663588
// DeleteArrayOp
35673589
//===----------------------------------------------------------------------===//

clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp

+40-8
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,27 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
2828
void runOnOperation() override;
2929
};
3030

31+
static bool isOpInLoop(mlir::Operation *op) {
32+
return op->getParentOfType<cir::LoopOpInterface>();
33+
}
34+
35+
static bool isWhileCondition(cir::AllocaOp alloca) {
36+
for (mlir::Operation *user : alloca->getUsers()) {
37+
if (!mlir::isa<cir::StoreOp>(user))
38+
continue;
39+
40+
auto store = mlir::cast<cir::StoreOp>(user);
41+
mlir::Operation *storeParentOp = store->getParentOp();
42+
if (!mlir::isa<cir::WhileOp>(storeParentOp))
43+
continue;
44+
45+
auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
46+
return &whileOp.getCond() == store->getParentRegion();
47+
}
48+
49+
return false;
50+
}
51+
3152
static void process(cir::FuncOp func) {
3253
if (func.getRegion().empty())
3354
return;
@@ -49,16 +70,27 @@ static void process(cir::FuncOp func) {
4970
mlir::Operation *insertPoint = &*entryBlock.begin();
5071

5172
for (auto alloca : allocas) {
52-
alloca->moveBefore(insertPoint);
5373
if (alloca.getConstant()) {
54-
// Hoisted alloca may come from the body of a loop, in which case the
55-
// stack slot is re-used by multiple objects alive in different iterations
56-
// of the loop. In theory, each of these objects are still constant within
57-
// their lifetimes, but currently we're not emitting metadata to further
58-
// describe this. So for now let's behave conservatively and remove the
59-
// const flag on nested allocas when hoisting them.
60-
alloca.setConstant(false);
74+
if (isOpInLoop(alloca)) {
75+
mlir::OpBuilder builder(alloca);
76+
auto invariantGroupOp =
77+
builder.create<cir::InvariantGroupOp>(alloca.getLoc(), alloca);
78+
alloca->replaceUsesWithIf(
79+
invariantGroupOp,
80+
[op = invariantGroupOp.getOperation()](mlir::OpOperand &use) {
81+
return use.getOwner() != op;
82+
});
83+
} else if (isWhileCondition(alloca)) {
84+
// The alloca represents a variable declared as the condition of a while
85+
// loop. In CIR, the alloca would be emitted at a scope outside of the
86+
// while loop. We have to remove the constant flag during hoisting,
87+
// otherwise we would be telling the optimizer that the alloca-ed value
88+
// is constant across all iterations of the while loop.
89+
alloca.setConstant(false);
90+
}
6191
}
92+
93+
alloca->moveBefore(insertPoint);
6294
}
6395
}
6496

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

+29-13
Original file line numberDiff line numberDiff line change
@@ -1649,6 +1649,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> &memorder) {
16491649
llvm_unreachable("unknown memory order");
16501650
}
16511651

1652+
static bool isLoadOrStoreInvariant(mlir::Value addr) {
1653+
if (auto addrAllocaOp =
1654+
mlir::dyn_cast_if_present<cir::AllocaOp>(addr.getDefiningOp()))
1655+
return addrAllocaOp.getConstant();
1656+
if (mlir::isa_and_present<cir::InvariantGroupOp>(addr.getDefiningOp()))
1657+
return true;
1658+
return false;
1659+
}
1660+
16521661
mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16531662
cir::LoadOp op, OpAdaptor adaptor,
16541663
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1668,12 +1677,8 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16681677
auto invariant = false;
16691678
// Under -O1 or higher optimization levels, add the invariant metadata if the
16701679
// load operation loads from a constant object.
1671-
if (lowerMod &&
1672-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1673-
auto addrAllocaOp =
1674-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1675-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1676-
}
1680+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1681+
invariant = isLoadOrStoreInvariant(op.getAddr());
16771682

16781683
// TODO: nontemporal, syncscope.
16791684
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
@@ -1708,12 +1713,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
17081713
auto invariant = false;
17091714
// Under -O1 or higher optimization levels, add the invariant metadata if the
17101715
// store operation stores to a constant object.
1711-
if (lowerMod &&
1712-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1713-
auto addrAllocaOp =
1714-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1715-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1716-
}
1716+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1717+
invariant = isLoadOrStoreInvariant(op.getAddr());
17171718

17181719
// Convert adapted value to its memory type if needed.
17191720
mlir::Value value = emitToMemory(rewriter, dataLayout,
@@ -3700,6 +3701,20 @@ mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
37003701
return mlir::success();
37013702
}
37023703

3704+
mlir::LogicalResult CIRToLLVMInvariantGroupOpLowering::matchAndRewrite(
3705+
cir::InvariantGroupOp op, OpAdaptor adaptor,
3706+
mlir::ConversionPatternRewriter &rewriter) const {
3707+
if (!lowerMod ||
3708+
lowerMod->getContext().getCodeGenOpts().OptimizationLevel == 0) {
3709+
rewriter.replaceOp(op, adaptor.getPtr());
3710+
return mlir::success();
3711+
}
3712+
3713+
rewriter.replaceOpWithNewOp<mlir::LLVM::LaunderInvariantGroupOp>(
3714+
op, adaptor.getPtr());
3715+
return mlir::success();
3716+
}
3717+
37033718
mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite(
37043719
cir::PrefetchOp op, OpAdaptor adaptor,
37053720
mlir::ConversionPatternRewriter &rewriter) const {
@@ -4143,7 +4158,8 @@ void populateCIRToLLVMConversionPatterns(
41434158
CIRToLLVMBaseDataMemberOpLowering,
41444159
CIRToLLVMCmpOpLowering,
41454160
CIRToLLVMDerivedDataMemberOpLowering,
4146-
CIRToLLVMGetRuntimeMemberOpLowering
4161+
CIRToLLVMGetRuntimeMemberOpLowering,
4162+
CIRToLLVMInvariantGroupOpLowering
41474163
// clang-format on
41484164
>(converter, patterns.getContext(), lowerModule);
41494165
patterns.add<

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

+15
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,21 @@ class CIRToLLVMInlineAsmOpLowering
962962
mlir::ConversionPatternRewriter &) const override;
963963
};
964964

965+
class CIRToLLVMInvariantGroupOpLowering
966+
: public mlir::OpConversionPattern<cir::InvariantGroupOp> {
967+
cir::LowerModule *lowerMod;
968+
969+
public:
970+
CIRToLLVMInvariantGroupOpLowering(const mlir::TypeConverter &typeConverter,
971+
mlir::MLIRContext *context,
972+
cir::LowerModule *lowerModule)
973+
: OpConversionPattern(typeConverter, context), lowerMod(lowerModule) {}
974+
975+
mlir::LogicalResult
976+
matchAndRewrite(cir::InvariantGroupOp op, OpAdaptor,
977+
mlir::ConversionPatternRewriter &) const override;
978+
};
979+
965980
class CIRToLLVMPrefetchOpLowering
966981
: public mlir::OpConversionPattern<cir::PrefetchOp> {
967982
public:

clang/test/CIR/CodeGen/const-alloca.cpp

+85
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
int produce_int();
77
void blackbox(const int &);
8+
void consume(int);
89

910
void local_const_int() {
1011
const int x = produce_int();
@@ -85,3 +86,87 @@ int local_const_optimize() {
8586
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#slot]])
8687
// LLVM-NEXT: ret i32 %[[#init]]
8788
// LLVM-NEXT: }
89+
90+
int local_scoped_const() {
91+
{
92+
const int x = produce_int();
93+
blackbox(x);
94+
return x;
95+
}
96+
}
97+
98+
// CIR-LABEL: @_Z18local_scoped_constv()
99+
// CIR: cir.scope {
100+
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
101+
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
102+
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
103+
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
104+
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
105+
// CIR-NEXT: cir.store %[[#x_reload]], %[[#ret_slot:]] : !s32i, !cir.ptr<!s32i>
106+
// CIR-NEXT: %[[#ret:]] = cir.load %[[#ret_slot]] : !cir.ptr<!s32i>, !s32i
107+
// CIR-NEXT: cir.return %[[#ret]] : !s32i
108+
// CIR-NEXT: }
109+
// CIR: }
110+
111+
// LLVM-LABEL: @_Z18local_scoped_constv()
112+
// LLVM-NEXT: %[[#x_slot:]] = alloca i32, align 4
113+
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
114+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4, !invariant.group !{{.+}}
115+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
116+
// LLVM-NEXT: ret i32 %[[#init]]
117+
// LLVM-NEXT: }
118+
119+
void local_const_in_loop() {
120+
for (int i = 0; i < 10; ++i) {
121+
const int x = produce_int();
122+
blackbox(x);
123+
consume(x);
124+
}
125+
}
126+
127+
// CIR-LABEL: @_Z19local_const_in_loopv
128+
// CIR: cir.scope {
129+
// CIR: cir.for : cond {
130+
// CIR: } body {
131+
// CIR-NEXT: cir.scope {
132+
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
133+
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
134+
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
135+
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
136+
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
137+
// CIR-NEXT: cir.call @_Z7consumei(%[[#x_reload]]) : (!s32i) -> ()
138+
// CIR-NEXT: }
139+
// CIR-NEXT: cir.yield
140+
// CIR-NEXT: } step {
141+
// CIR: }
142+
// CIR-NEXT: }
143+
// CIR-NEXT: cir.return
144+
// CIR-NEXT: }
145+
146+
// LLVM-LABEL: @_Z19local_const_in_loopv()
147+
// LLVM: %[[#x_ptr:]] = call ptr @llvm.launder.invariant.group.p0(ptr nonnull %1)
148+
// LLVM-NEXT: %[[#init:]] = call i32 @_Z11produce_intv()
149+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_ptr]], align 4, !invariant.group !{{.+}}
150+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_ptr]])
151+
// LLVM-NEXT: call void @_Z7consumei(i32 %[[#init]])
152+
// LLVM: }
153+
154+
void local_const_in_while_condition() {
155+
while (const int x = produce_int()) {
156+
blackbox(x);
157+
}
158+
}
159+
160+
// LLVM-LABEL: @_Z30local_const_in_while_conditionv()
161+
// LLVM: %[[#x_slot:]] = alloca i32, align 4
162+
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
163+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4
164+
// LLVM-NEXT: %[[loop_cond:.+]] = icmp eq i32 %[[#init]], 0
165+
// LLVM-NEXT: br i1 %[[loop_cond]], label %{{.+}}, label %[[loop_body:.+]]
166+
// LLVM: [[loop_body]]:
167+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
168+
// LLVM-NEXT: %[[#next:]] = call i32 @_Z11produce_intv()
169+
// LLVM-NEXT: store i32 %[[#next]], ptr %[[#x_slot]], align 4
170+
// LLVM-NEXT: %[[cond:.+]] = icmp eq i32 %[[#next]], 0
171+
// LLVM-NEXT: br i1 %[[cond]], label %{{.+}}, label %[[loop_body]]
172+
// LLVM: }

0 commit comments

Comments
 (0)