Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR] Lower nested local constant alloca #1261

Merged
merged 1 commit into from
Feb 17, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
@@ -3563,6 +3563,63 @@ def LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {

}

//===----------------------------------------------------------------------===//
// InvariantGroupOp
//===----------------------------------------------------------------------===//

def InvariantGroupOp
: CIR_Op<"invariant_group", [Pure, SameOperandsAndResultType]> {
let summary = "Start an invariant group";
let description = [{
The `cir.invariant_group` operation takes a single pointer value as argument
and returns the same pointer value with fresh [invariant group] information.
All loads and stores that access the returned pointer value are presumed by
the optimizer to load or store the same value.

[invariant group]: https://llvm.org/docs/LangRef.html#invariant-group-metadata

This operation is not emitted during CIRGen. Instead, it is created when
hoisting constant alloca operations to the entry block of a function. This
operation effectively marks the syntactic scope of the constant local
variable represented by the hosited alloca operation, and it allows for
better LLVMIR generation with potentially more optimizations.

For example, if we have the following CIR before alloca hoisting:

```mlir
cir.func @foo() {
cir.scope {
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
use(%0)
}
}
```

After alloca hoisting:

```mlir
cir.func @foo() {
%0 = cir.alloca !s32i : !cir.ptr<!s32i>
cir.scope {
%1 = cir.invariant_group %0 : !cir.ptr<!s32i>
use(%1)
}
}
```

During LLVMIR lowering, load and store operations whose pointer operand
comes from `cir.invariant_group` are lowered to corresponding LLVM
instructions with invariant group metadata attached.
}];

let arguments = (ins CIR_PointerType:$ptr);
let results = (outs CIR_PointerType:$result);

let assemblyFormat = [{
$ptr `:` type($result) attr-dict
}];
}

//===----------------------------------------------------------------------===//
// DeleteArrayOp
//===----------------------------------------------------------------------===//
165 changes: 155 additions & 10 deletions clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp
Original file line number Diff line number Diff line change
@@ -28,7 +28,142 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
void runOnOperation() override;
};

static void process(cir::FuncOp func) {
static bool isOpInLoop(mlir::Operation *op) {
return op->getParentOfType<cir::LoopOpInterface>();
}

static bool hasStoreToAllocaInWhileCond(cir::AllocaOp alloca) {
// This function determines whether the given alloca operation represents
// a variable defined as a while loop's condition.
//
// Specifically, C/C++ allows the condition of a while loop be a variable
// declaration:
//
// while (const int x = foo()) { /* body... */ }
//
// CIRGen would emit the following CIR for the above code:
//
// cir.scope {
// %x.slot = cir.alloca !s32i [init, const]
// cir.while {
// %0 = cir.call @foo()
// cir.store %0, %x
// %1 = cir.load %x
// %2 = cir.cast int_to_bool %1
// cir.condition(%2)
// } do {
// // loop body goes here.
// }
// }
//
// Note that %x.slot is emitted outside the cir.while operation. Ideally, the
// cir.while operation should cover this cir.alloca operation, but currently
// CIR does not work this way. When hoisting such an alloca operation, one
// must remove the "const" flag from it, otherwise LLVM lowering code will
// mistakenly attach invariant group metadata to the load and store operations
// in the while body, indicating that all loads and stores across all
// iterations of the loop are constant.

for (mlir::Operation *user : alloca->getUsers()) {
if (!mlir::isa<cir::StoreOp>(user))
continue;

auto store = mlir::cast<cir::StoreOp>(user);
mlir::Operation *storeParentOp = store->getParentOp();
if (!mlir::isa<cir::WhileOp>(storeParentOp))
continue;

auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
return &whileOp.getCond() == store->getParentRegion();
}

return false;
}

static void processConstAlloca(cir::AllocaOp alloca) {
// When optimization is enabled, LLVM lowering would start emitting invariant
// group metadata for loads and stores to alloca-ed objects with "const"
// attribute. For example, the following CIR:
//
// %slot = cir.alloca !s32i [init, const]
// cir.store %0, %slot
// %1 = cir.load %slot
//
// would be lowered to the following LLVM IR:
//
// %slot = alloca i32, i64 1
// store i32 %0, ptr %slot, !invariant.group !0
// %1 = load i32, ptr %slot, !invariant.group !0
//
// The invariant group metadata would tell LLVM optimizer that the store and
// load instruction would store and load the same value from %slot.
//
// So far so good. Things started to get tricky when such an alloca operation
// appears in the body of a loop construct:
//
// cir.some_loop_construct {
// %slot = cir.alloca !s32i [init, const]
// cir.store %0, %slot
// %1 = cir.load %slot
// }
//
// After alloca hoisting, the CIR code above would be transformed into:
//
// %slot = cir.alloca !s32i [init, const]
// cir.some_loop_construct {
// cir.store %0, %slot
// %1 = cir.load %slot
// }
//
// Notice how alloca hoisting change the semantics of the program in such a
// case. The transformed code now indicates the optimizer that the load and
// store operations load and store the same value **across all iterations of
// the loop**!
//
// To overcome this problem, we instead transform the program into this:
//
// %slot = cir.alloca !s32i [init, const]
// cir.some_loop_construct {
// %slot.inv = cir.invariant_group %slot
// cir.store %0, %slot.inv
// %1 = cir.load %slot.inv
// }
//
// The cir.invariant_group operation attaches fresh invariant information to
// the operand pointer and yields a pointer with the fresh invariant
// information. Upon each loop iteration, the old invariant information is
// disgarded, and a new invariant information is attached, thus the correct
// program semantic retains. During LLVM lowering, the cir.invariant_group
// operation would eventually become an intrinsic call to
// @llvm.launder.invariant.group.

if (isOpInLoop(alloca)) {
// Mark the alloca-ed pointer as invariant via the cir.invariant_group
// operation.
mlir::OpBuilder builder(alloca);
auto invariantGroupOp =
builder.create<cir::InvariantGroupOp>(alloca.getLoc(), alloca);

// And replace all uses of the original alloca-ed pointer with the marked
// pointer (which carries invariant group information).
alloca->replaceUsesWithIf(
invariantGroupOp,
[op = invariantGroupOp.getOperation()](mlir::OpOperand &use) {
return use.getOwner() != op;
});
} else if (hasStoreToAllocaInWhileCond(alloca)) {
// The alloca represents a variable declared as the condition of a while
// loop. In CIR, the alloca would be emitted at a scope outside of the
// while loop. We have to remove the constant flag during hoisting,
// otherwise we would be telling the optimizer that the alloca-ed value
// is constant across all iterations of the while loop.
//
// See the body of the isWhileCondition function for more details.
alloca.setConstant(false);
}
}

static void process(mlir::ModuleOp mod, cir::FuncOp func) {
if (func.getRegion().empty())
return;

@@ -47,25 +182,35 @@ static void process(cir::FuncOp func) {
return;

mlir::Operation *insertPoint = &*entryBlock.begin();
auto optInfoAttr = mlir::cast_if_present<cir::OptInfoAttr>(
mod->getAttr(cir::CIRDialect::getOptInfoAttrName()));
unsigned optLevel = optInfoAttr ? optInfoAttr.getLevel() : 0;

for (auto alloca : allocas) {
alloca->moveBefore(insertPoint);
if (alloca.getConstant()) {
// Hoisted alloca may come from the body of a loop, in which case the
// stack slot is re-used by multiple objects alive in different iterations
// of the loop. In theory, each of these objects are still constant within
// their lifetimes, but currently we're not emitting metadata to further
// describe this. So for now let's behave conservatively and remove the
// const flag on nested allocas when hoisting them.
alloca.setConstant(false);
if (optLevel == 0) {
// Under non-optimized builds, just remove the constant flag.
alloca.setConstant(false);
continue;
}

processConstAlloca(alloca);
}

alloca->moveBefore(insertPoint);
}
}

void HoistAllocasPass::runOnOperation() {
llvm::TimeTraceScope scope("Hoist Allocas");
llvm::SmallVector<Operation *, 16> ops;
getOperation()->walk([&](cir::FuncOp op) { process(op); });

Operation *op = getOperation();
auto mod = mlir::dyn_cast<mlir::ModuleOp>(op);
if (!mod)
mod = op->getParentOfType<mlir::ModuleOp>();

getOperation()->walk([&](cir::FuncOp op) { process(mod, op); });
}

} // namespace
42 changes: 29 additions & 13 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
@@ -1612,6 +1612,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> &memorder) {
llvm_unreachable("unknown memory order");
}

static bool isLoadOrStoreInvariant(mlir::Value addr) {
if (auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(addr.getDefiningOp()))
return addrAllocaOp.getConstant();
if (mlir::isa_and_present<cir::InvariantGroupOp>(addr.getDefiningOp()))
return true;
return false;
}

mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
cir::LoadOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -1631,12 +1640,8 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
auto invariant = false;
// Under -O1 or higher optimization levels, add the invariant metadata if the
// load operation loads from a constant object.
if (lowerMod &&
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
invariant = addrAllocaOp && addrAllocaOp.getConstant();
}
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
invariant = isLoadOrStoreInvariant(op.getAddr());

// TODO: nontemporal, syncscope.
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
@@ -1674,12 +1679,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
auto invariant = false;
// Under -O1 or higher optimization levels, add the invariant metadata if the
// store operation stores to a constant object.
if (lowerMod &&
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
invariant = addrAllocaOp && addrAllocaOp.getConstant();
}
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
invariant = isLoadOrStoreInvariant(op.getAddr());

// Convert adapted value to its memory type if needed.
mlir::Value value = emitToMemory(rewriter, dataLayout,
@@ -3666,6 +3667,20 @@ mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMInvariantGroupOpLowering::matchAndRewrite(
cir::InvariantGroupOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
if (!lowerMod ||
lowerMod->getContext().getCodeGenOpts().OptimizationLevel == 0) {
rewriter.replaceOp(op, adaptor.getPtr());
return mlir::success();
}

rewriter.replaceOpWithNewOp<mlir::LLVM::LaunderInvariantGroupOp>(
op, adaptor.getPtr());
return mlir::success();
}

mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite(
cir::PrefetchOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
@@ -4107,7 +4122,8 @@ void populateCIRToLLVMConversionPatterns(
CIRToLLVMBaseDataMemberOpLowering,
CIRToLLVMCmpOpLowering,
CIRToLLVMDerivedDataMemberOpLowering,
CIRToLLVMGetRuntimeMemberOpLowering
CIRToLLVMGetRuntimeMemberOpLowering,
CIRToLLVMInvariantGroupOpLowering
// clang-format on
>(converter, patterns.getContext(), lowerModule);
patterns.add<
15 changes: 15 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Original file line number Diff line number Diff line change
@@ -975,6 +975,21 @@ class CIRToLLVMInlineAsmOpLowering
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMInvariantGroupOpLowering
: public mlir::OpConversionPattern<cir::InvariantGroupOp> {
cir::LowerModule *lowerMod;

public:
CIRToLLVMInvariantGroupOpLowering(const mlir::TypeConverter &typeConverter,
mlir::MLIRContext *context,
cir::LowerModule *lowerModule)
: OpConversionPattern(typeConverter, context), lowerMod(lowerModule) {}

mlir::LogicalResult
matchAndRewrite(cir::InvariantGroupOp op, OpAdaptor,
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMPrefetchOpLowering
: public mlir::OpConversionPattern<cir::PrefetchOp> {
public:
Loading