Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 691c91b

Browse files
committedFeb 10, 2025··
[CIR] Lower nested local constant alloca
This patch adds support for lowering local constants in nested scopes, including those in nested loops. For those constant allocas in non-loop inner scopes, this patch keeps their constant flags during alloca hoisting. LLVM lowering would correctly emit necessary invariant metadata for those allocas. For those constant allocas in a loop, this patch introduces a new operation `cir.invariant_group` that marks the beginning of the lifetime of the constant objects. This operation is put at the location of the alloca operation before hoisting them. This patch updates LLVM lowering to emit the necessary invariant metadata when loading and storing through such pointers. This patch takes care of the special case where the constant alloca represents a variable declared in the condition part of a while loop. In such a case, this patch removes the constant flag on the alloca operation when hositing them.
1 parent f8821e8 commit 691c91b

File tree

5 files changed

+252
-23
lines changed

5 files changed

+252
-23
lines changed
 

‎clang/include/clang/CIR/Dialect/IR/CIROps.td

+57
Original file line numberDiff line numberDiff line change
@@ -3562,6 +3562,63 @@ def LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {
35623562

35633563
}
35643564

3565+
//===----------------------------------------------------------------------===//
3566+
// InvariantGroupOp
3567+
//===----------------------------------------------------------------------===//
3568+
3569+
def InvariantGroupOp
3570+
: CIR_Op<"invariant_group", [Pure, SameOperandsAndResultType]> {
3571+
let summary = "Start an invariant group";
3572+
let description = [{
3573+
The `cir.invariant_group` operation takes a single pointer value as argument
3574+
and returns the same pointer value with fresh [invariant group] information.
3575+
All loads and stores that access the returned pointer value are presumed by
3576+
the optimizer to load or store the same value.
3577+
3578+
[invariant group]: https://llvm.org/docs/LangRef.html#invariant-group-metadata
3579+
3580+
This operation is not emitted during CIRGen. Instead, it is created when
3581+
hoisting constant alloca operations to the entry block of a function. This
3582+
operation effectively marks the syntactic scope of the constant local
3583+
variable represented by the hosited alloca operation, and it allows for
3584+
better LLVMIR generation with potentially more optimizations.
3585+
3586+
For example, if we have the following CIR before alloca hoisting:
3587+
3588+
```mlir
3589+
cir.func @foo() {
3590+
cir.scope {
3591+
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
3592+
use(%0)
3593+
}
3594+
}
3595+
```
3596+
3597+
After alloca hoisting:
3598+
3599+
```mlir
3600+
cir.func @foo() {
3601+
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
3602+
cir.scope {
3603+
%1 = cir.invariant_group %0 : !cir.ptr<!s32i>
3604+
use(%1)
3605+
}
3606+
}
3607+
```
3608+
3609+
During LLVMIR lowering, load and store operations whose pointer operand
3610+
comes from `cir.invariant_group` are lowered to corresponding LLVM
3611+
instructions with invariant group metadata attached.
3612+
}];
3613+
3614+
let arguments = (ins CIR_PointerType:$ptr);
3615+
let results = (outs CIR_PointerType:$result);
3616+
3617+
let assemblyFormat = [{
3618+
$ptr `:` type($result) attr-dict
3619+
}];
3620+
}
3621+
35653622
//===----------------------------------------------------------------------===//
35663623
// DeleteArrayOp
35673624
//===----------------------------------------------------------------------===//

‎clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp

+66-10
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,53 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
2828
void runOnOperation() override;
2929
};
3030

31-
static void process(cir::FuncOp func) {
31+
static bool isOpInLoop(mlir::Operation *op) {
32+
return op->getParentOfType<cir::LoopOpInterface>();
33+
}
34+
35+
static bool isWhileCondition(cir::AllocaOp alloca) {
36+
for (mlir::Operation *user : alloca->getUsers()) {
37+
if (!mlir::isa<cir::StoreOp>(user))
38+
continue;
39+
40+
auto store = mlir::cast<cir::StoreOp>(user);
41+
mlir::Operation *storeParentOp = store->getParentOp();
42+
if (!mlir::isa<cir::WhileOp>(storeParentOp))
43+
continue;
44+
45+
auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
46+
return &whileOp.getCond() == store->getParentRegion();
47+
}
48+
49+
return false;
50+
}
51+
52+
static void processConstAlloca(cir::AllocaOp alloca) {
53+
if (isOpInLoop(alloca)) {
54+
// Mark the alloca-ed pointer as invariant via the cir.invariant_group
55+
// operation.
56+
mlir::OpBuilder builder(alloca);
57+
auto invariantGroupOp =
58+
builder.create<cir::InvariantGroupOp>(alloca.getLoc(), alloca);
59+
60+
// And replace all uses of the original alloca-ed pointer with the marked
61+
// pointer (which carries invariant group information).
62+
alloca->replaceUsesWithIf(
63+
invariantGroupOp,
64+
[op = invariantGroupOp.getOperation()](mlir::OpOperand &use) {
65+
return use.getOwner() != op;
66+
});
67+
} else if (isWhileCondition(alloca)) {
68+
// The alloca represents a variable declared as the condition of a while
69+
// loop. In CIR, the alloca would be emitted at a scope outside of the
70+
// while loop. We have to remove the constant flag during hoisting,
71+
// otherwise we would be telling the optimizer that the alloca-ed value
72+
// is constant across all iterations of the while loop.
73+
alloca.setConstant(false);
74+
}
75+
}
76+
77+
static void process(mlir::ModuleOp mod, cir::FuncOp func) {
3278
if (func.getRegion().empty())
3379
return;
3480

@@ -47,25 +93,35 @@ static void process(cir::FuncOp func) {
4793
return;
4894

4995
mlir::Operation *insertPoint = &*entryBlock.begin();
96+
auto optInfoAttr = mlir::cast_if_present<cir::OptInfoAttr>(
97+
mod->getAttr(cir::CIRDialect::getOptInfoAttrName()));
98+
unsigned optLevel = optInfoAttr ? optInfoAttr.getLevel() : 0;
5099

51100
for (auto alloca : allocas) {
52-
alloca->moveBefore(insertPoint);
53101
if (alloca.getConstant()) {
54-
// Hoisted alloca may come from the body of a loop, in which case the
55-
// stack slot is re-used by multiple objects alive in different iterations
56-
// of the loop. In theory, each of these objects are still constant within
57-
// their lifetimes, but currently we're not emitting metadata to further
58-
// describe this. So for now let's behave conservatively and remove the
59-
// const flag on nested allocas when hoisting them.
60-
alloca.setConstant(false);
102+
if (optLevel == 0) {
103+
// Under non-optimized builds, just remove the constant flag.
104+
alloca.setConstant(false);
105+
continue;
106+
}
107+
108+
processConstAlloca(alloca);
61109
}
110+
111+
alloca->moveBefore(insertPoint);
62112
}
63113
}
64114

65115
void HoistAllocasPass::runOnOperation() {
66116
llvm::TimeTraceScope scope("Hoist Allocas");
67117
llvm::SmallVector<Operation *, 16> ops;
68-
getOperation()->walk([&](cir::FuncOp op) { process(op); });
118+
119+
Operation *op = getOperation();
120+
auto mod = mlir::dyn_cast<mlir::ModuleOp>(op);
121+
if (!mod)
122+
mod = op->getParentOfType<mlir::ModuleOp>();
123+
124+
getOperation()->walk([&](cir::FuncOp op) { process(mod, op); });
69125
}
70126

71127
} // namespace

‎clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

+29-13
Original file line numberDiff line numberDiff line change
@@ -1649,6 +1649,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> &memorder) {
16491649
llvm_unreachable("unknown memory order");
16501650
}
16511651

1652+
static bool isLoadOrStoreInvariant(mlir::Value addr) {
1653+
if (auto addrAllocaOp =
1654+
mlir::dyn_cast_if_present<cir::AllocaOp>(addr.getDefiningOp()))
1655+
return addrAllocaOp.getConstant();
1656+
if (mlir::isa_and_present<cir::InvariantGroupOp>(addr.getDefiningOp()))
1657+
return true;
1658+
return false;
1659+
}
1660+
16521661
mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16531662
cir::LoadOp op, OpAdaptor adaptor,
16541663
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1668,12 +1677,8 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
16681677
auto invariant = false;
16691678
// Under -O1 or higher optimization levels, add the invariant metadata if the
16701679
// load operation loads from a constant object.
1671-
if (lowerMod &&
1672-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1673-
auto addrAllocaOp =
1674-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1675-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1676-
}
1680+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1681+
invariant = isLoadOrStoreInvariant(op.getAddr());
16771682

16781683
// TODO: nontemporal, syncscope.
16791684
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
@@ -1708,12 +1713,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
17081713
auto invariant = false;
17091714
// Under -O1 or higher optimization levels, add the invariant metadata if the
17101715
// store operation stores to a constant object.
1711-
if (lowerMod &&
1712-
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
1713-
auto addrAllocaOp =
1714-
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
1715-
invariant = addrAllocaOp && addrAllocaOp.getConstant();
1716-
}
1716+
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
1717+
invariant = isLoadOrStoreInvariant(op.getAddr());
17171718

17181719
// Convert adapted value to its memory type if needed.
17191720
mlir::Value value = emitToMemory(rewriter, dataLayout,
@@ -3700,6 +3701,20 @@ mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
37003701
return mlir::success();
37013702
}
37023703

3704+
mlir::LogicalResult CIRToLLVMInvariantGroupOpLowering::matchAndRewrite(
3705+
cir::InvariantGroupOp op, OpAdaptor adaptor,
3706+
mlir::ConversionPatternRewriter &rewriter) const {
3707+
if (!lowerMod ||
3708+
lowerMod->getContext().getCodeGenOpts().OptimizationLevel == 0) {
3709+
rewriter.replaceOp(op, adaptor.getPtr());
3710+
return mlir::success();
3711+
}
3712+
3713+
rewriter.replaceOpWithNewOp<mlir::LLVM::LaunderInvariantGroupOp>(
3714+
op, adaptor.getPtr());
3715+
return mlir::success();
3716+
}
3717+
37033718
mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite(
37043719
cir::PrefetchOp op, OpAdaptor adaptor,
37053720
mlir::ConversionPatternRewriter &rewriter) const {
@@ -4143,7 +4158,8 @@ void populateCIRToLLVMConversionPatterns(
41434158
CIRToLLVMBaseDataMemberOpLowering,
41444159
CIRToLLVMCmpOpLowering,
41454160
CIRToLLVMDerivedDataMemberOpLowering,
4146-
CIRToLLVMGetRuntimeMemberOpLowering
4161+
CIRToLLVMGetRuntimeMemberOpLowering,
4162+
CIRToLLVMInvariantGroupOpLowering
41474163
// clang-format on
41484164
>(converter, patterns.getContext(), lowerModule);
41494165
patterns.add<

‎clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

+15
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,21 @@ class CIRToLLVMInlineAsmOpLowering
962962
mlir::ConversionPatternRewriter &) const override;
963963
};
964964

965+
class CIRToLLVMInvariantGroupOpLowering
966+
: public mlir::OpConversionPattern<cir::InvariantGroupOp> {
967+
cir::LowerModule *lowerMod;
968+
969+
public:
970+
CIRToLLVMInvariantGroupOpLowering(const mlir::TypeConverter &typeConverter,
971+
mlir::MLIRContext *context,
972+
cir::LowerModule *lowerModule)
973+
: OpConversionPattern(typeConverter, context), lowerMod(lowerModule) {}
974+
975+
mlir::LogicalResult
976+
matchAndRewrite(cir::InvariantGroupOp op, OpAdaptor,
977+
mlir::ConversionPatternRewriter &) const override;
978+
};
979+
965980
class CIRToLLVMPrefetchOpLowering
966981
: public mlir::OpConversionPattern<cir::PrefetchOp> {
967982
public:

‎clang/test/CIR/CodeGen/const-alloca.cpp

+85
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
int produce_int();
77
void blackbox(const int &);
8+
void consume(int);
89

910
void local_const_int() {
1011
const int x = produce_int();
@@ -85,3 +86,87 @@ int local_const_optimize() {
8586
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#slot]])
8687
// LLVM-NEXT: ret i32 %[[#init]]
8788
// LLVM-NEXT: }
89+
90+
int local_scoped_const() {
91+
{
92+
const int x = produce_int();
93+
blackbox(x);
94+
return x;
95+
}
96+
}
97+
98+
// CIR-LABEL: @_Z18local_scoped_constv()
99+
// CIR: cir.scope {
100+
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
101+
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
102+
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
103+
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
104+
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
105+
// CIR-NEXT: cir.store %[[#x_reload]], %[[#ret_slot:]] : !s32i, !cir.ptr<!s32i>
106+
// CIR-NEXT: %[[#ret:]] = cir.load %[[#ret_slot]] : !cir.ptr<!s32i>, !s32i
107+
// CIR-NEXT: cir.return %[[#ret]] : !s32i
108+
// CIR-NEXT: }
109+
// CIR: }
110+
111+
// LLVM-LABEL: @_Z18local_scoped_constv()
112+
// LLVM-NEXT: %[[#x_slot:]] = alloca i32, align 4
113+
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
114+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4, !invariant.group !{{.+}}
115+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
116+
// LLVM-NEXT: ret i32 %[[#init]]
117+
// LLVM-NEXT: }
118+
119+
void local_const_in_loop() {
120+
for (int i = 0; i < 10; ++i) {
121+
const int x = produce_int();
122+
blackbox(x);
123+
consume(x);
124+
}
125+
}
126+
127+
// CIR-LABEL: @_Z19local_const_in_loopv
128+
// CIR: cir.scope {
129+
// CIR: cir.for : cond {
130+
// CIR: } body {
131+
// CIR-NEXT: cir.scope {
132+
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
133+
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
134+
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
135+
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
136+
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
137+
// CIR-NEXT: cir.call @_Z7consumei(%[[#x_reload]]) : (!s32i) -> ()
138+
// CIR-NEXT: }
139+
// CIR-NEXT: cir.yield
140+
// CIR-NEXT: } step {
141+
// CIR: }
142+
// CIR-NEXT: }
143+
// CIR-NEXT: cir.return
144+
// CIR-NEXT: }
145+
146+
// LLVM-LABEL: @_Z19local_const_in_loopv()
147+
// LLVM: %[[#x_ptr:]] = call ptr @llvm.launder.invariant.group.p0(ptr nonnull %1)
148+
// LLVM-NEXT: %[[#init:]] = call i32 @_Z11produce_intv()
149+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_ptr]], align 4, !invariant.group !{{.+}}
150+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_ptr]])
151+
// LLVM-NEXT: call void @_Z7consumei(i32 %[[#init]])
152+
// LLVM: }
153+
154+
void local_const_in_while_condition() {
155+
while (const int x = produce_int()) {
156+
blackbox(x);
157+
}
158+
}
159+
160+
// LLVM-LABEL: @_Z30local_const_in_while_conditionv()
161+
// LLVM: %[[#x_slot:]] = alloca i32, align 4
162+
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
163+
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4
164+
// LLVM-NEXT: %[[loop_cond:.+]] = icmp eq i32 %[[#init]], 0
165+
// LLVM-NEXT: br i1 %[[loop_cond]], label %{{.+}}, label %[[loop_body:.+]]
166+
// LLVM: [[loop_body]]:
167+
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
168+
// LLVM-NEXT: %[[#next:]] = call i32 @_Z11produce_intv()
169+
// LLVM-NEXT: store i32 %[[#next]], ptr %[[#x_slot]], align 4
170+
// LLVM-NEXT: %[[cond:.+]] = icmp eq i32 %[[#next]], 0
171+
// LLVM-NEXT: br i1 %[[cond]], label %{{.+}}, label %[[loop_body]]
172+
// LLVM: }

0 commit comments

Comments
 (0)
Please sign in to comment.