@@ -28,7 +28,142 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
28
28
void runOnOperation () override ;
29
29
};
30
30
31
- static void process (cir::FuncOp func) {
31
+ static bool isOpInLoop (mlir::Operation *op) {
32
+ return op->getParentOfType <cir::LoopOpInterface>();
33
+ }
34
+
35
+ static bool hasStoreToAllocaInWhileCond (cir::AllocaOp alloca) {
36
+ // This function determines whether the given alloca operation represents
37
+ // a variable defined as a while loop's condition.
38
+ //
39
+ // Specifically, C/C++ allows the condition of a while loop be a variable
40
+ // declaration:
41
+ //
42
+ // while (const int x = foo()) { /* body... */ }
43
+ //
44
+ // CIRGen would emit the following CIR for the above code:
45
+ //
46
+ // cir.scope {
47
+ // %x.slot = cir.alloca !s32i [init, const]
48
+ // cir.while {
49
+ // %0 = cir.call @foo()
50
+ // cir.store %0, %x
51
+ // %1 = cir.load %x
52
+ // %2 = cir.cast int_to_bool %1
53
+ // cir.condition(%2)
54
+ // } do {
55
+ // // loop body goes here.
56
+ // }
57
+ // }
58
+ //
59
+ // Note that %x.slot is emitted outside the cir.while operation. Ideally, the
60
+ // cir.while operation should cover this cir.alloca operation, but currently
61
+ // CIR does not work this way. When hoisting such an alloca operation, one
62
+ // must remove the "const" flag from it, otherwise LLVM lowering code will
63
+ // mistakenly attach invariant group metadata to the load and store operations
64
+ // in the while body, indicating that all loads and stores across all
65
+ // iterations of the loop are constant.
66
+
67
+ for (mlir::Operation *user : alloca ->getUsers ()) {
68
+ if (!mlir::isa<cir::StoreOp>(user))
69
+ continue ;
70
+
71
+ auto store = mlir::cast<cir::StoreOp>(user);
72
+ mlir::Operation *storeParentOp = store->getParentOp ();
73
+ if (!mlir::isa<cir::WhileOp>(storeParentOp))
74
+ continue ;
75
+
76
+ auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
77
+ return &whileOp.getCond () == store->getParentRegion ();
78
+ }
79
+
80
+ return false ;
81
+ }
82
+
83
+ static void processConstAlloca (cir::AllocaOp alloca) {
84
+ // When optimization is enabled, LLVM lowering would start emitting invariant
85
+ // group metadata for loads and stores to alloca-ed objects with "const"
86
+ // attribute. For example, the following CIR:
87
+ //
88
+ // %slot = cir.alloca !s32i [init, const]
89
+ // cir.store %0, %slot
90
+ // %1 = cir.load %slot
91
+ //
92
+ // would be lowered to the following LLVM IR:
93
+ //
94
+ // %slot = alloca i32, i64 1
95
+ // store i32 %0, ptr %slot, !invariant.group !0
96
+ // %1 = load i32, ptr %slot, !invariant.group !0
97
+ //
98
+ // The invariant group metadata would tell LLVM optimizer that the store and
99
+ // load instruction would store and load the same value from %slot.
100
+ //
101
+ // So far so good. Things started to get tricky when such an alloca operation
102
+ // appears in the body of a loop construct:
103
+ //
104
+ // cir.some_loop_construct {
105
+ // %slot = cir.alloca !s32i [init, const]
106
+ // cir.store %0, %slot
107
+ // %1 = cir.load %slot
108
+ // }
109
+ //
110
+ // After alloca hoisting, the CIR code above would be transformed into:
111
+ //
112
+ // %slot = cir.alloca !s32i [init, const]
113
+ // cir.some_loop_construct {
114
+ // cir.store %0, %slot
115
+ // %1 = cir.load %slot
116
+ // }
117
+ //
118
+ // Notice how alloca hoisting change the semantics of the program in such a
119
+ // case. The transformed code now indicates the optimizer that the load and
120
+ // store operations load and store the same value **across all iterations of
121
+ // the loop**!
122
+ //
123
+ // To overcome this problem, we instead transform the program into this:
124
+ //
125
+ // %slot = cir.alloca !s32i [init, const]
126
+ // cir.some_loop_construct {
127
+ // %slot.inv = cir.invariant_group %slot
128
+ // cir.store %0, %slot.inv
129
+ // %1 = cir.load %slot.inv
130
+ // }
131
+ //
132
+ // The cir.invariant_group operation attaches fresh invariant information to
133
+ // the operand pointer and yields a pointer with the fresh invariant
134
+ // information. Upon each loop iteration, the old invariant information is
135
+ // disgarded, and a new invariant information is attached, thus the correct
136
+ // program semantic retains. During LLVM lowering, the cir.invariant_group
137
+ // operation would eventually become an intrinsic call to
138
+ // @llvm.launder.invariant.group.
139
+
140
+ if (isOpInLoop (alloca )) {
141
+ // Mark the alloca-ed pointer as invariant via the cir.invariant_group
142
+ // operation.
143
+ mlir::OpBuilder builder (alloca );
144
+ auto invariantGroupOp =
145
+ builder.create <cir::InvariantGroupOp>(alloca .getLoc (), alloca );
146
+
147
+ // And replace all uses of the original alloca-ed pointer with the marked
148
+ // pointer (which carries invariant group information).
149
+ alloca ->replaceUsesWithIf (
150
+ invariantGroupOp,
151
+ [op = invariantGroupOp.getOperation ()](mlir::OpOperand &use) {
152
+ return use.getOwner () != op;
153
+ });
154
+ } else if (hasStoreToAllocaInWhileCond (alloca )) {
155
+ // The alloca represents a variable declared as the condition of a while
156
+ // loop. In CIR, the alloca would be emitted at a scope outside of the
157
+ // while loop. We have to remove the constant flag during hoisting,
158
+ // otherwise we would be telling the optimizer that the alloca-ed value
159
+ // is constant across all iterations of the while loop.
160
+ //
161
+ // See the body of the isWhileCondition function for more details.
162
+ alloca .setConstant (false );
163
+ }
164
+ }
165
+
166
+ static void process (mlir::ModuleOp mod, cir::FuncOp func) {
32
167
if (func.getRegion ().empty ())
33
168
return ;
34
169
@@ -47,25 +182,35 @@ static void process(cir::FuncOp func) {
47
182
return ;
48
183
49
184
mlir::Operation *insertPoint = &*entryBlock.begin ();
185
+ auto optInfoAttr = mlir::cast_if_present<cir::OptInfoAttr>(
186
+ mod->getAttr (cir::CIRDialect::getOptInfoAttrName ()));
187
+ unsigned optLevel = optInfoAttr ? optInfoAttr.getLevel () : 0 ;
50
188
51
189
for (auto alloca : allocas) {
52
- alloca ->moveBefore (insertPoint);
53
190
if (alloca .getConstant ()) {
54
- // Hoisted alloca may come from the body of a loop, in which case the
55
- // stack slot is re-used by multiple objects alive in different iterations
56
- // of the loop. In theory, each of these objects are still constant within
57
- // their lifetimes, but currently we're not emitting metadata to further
58
- // describe this. So for now let's behave conservatively and remove the
59
- // const flag on nested allocas when hoisting them.
60
- alloca . setConstant ( false );
191
+ if (optLevel == 0 ) {
192
+ // Under non-optimized builds, just remove the constant flag.
193
+ alloca . setConstant ( false );
194
+ continue ;
195
+ }
196
+
197
+ processConstAlloca ( alloca );
61
198
}
199
+
200
+ alloca ->moveBefore (insertPoint);
62
201
}
63
202
}
64
203
65
204
void HoistAllocasPass::runOnOperation () {
66
205
llvm::TimeTraceScope scope (" Hoist Allocas" );
67
206
llvm::SmallVector<Operation *, 16 > ops;
68
- getOperation ()->walk ([&](cir::FuncOp op) { process (op); });
207
+
208
+ Operation *op = getOperation ();
209
+ auto mod = mlir::dyn_cast<mlir::ModuleOp>(op);
210
+ if (!mod)
211
+ mod = op->getParentOfType <mlir::ModuleOp>();
212
+
213
+ getOperation ()->walk ([&](cir::FuncOp op) { process (mod, op); });
69
214
}
70
215
71
216
} // namespace
0 commit comments