Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][OpenMP] Extend delayed privatization for omp.simd #122156

Merged
merged 1 commit into from
Jan 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2144,19 +2144,19 @@ static void genStandaloneSimd(lower::AbstractConverter &converter,
genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps,
simdReductionSyms);

// TODO: Support delayed privatization.
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/true,
/*useDelayedPrivatization=*/false, symTable);
dsp.processStep1();
enableDelayedPrivatization, symTable);
dsp.processStep1(&simdClauseOps);

mlir::omp::LoopNestOperands loopNestClauseOps;
llvm::SmallVector<const semantics::Symbol *> iv;
genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc,
loopNestClauseOps, iv);

EntryBlockArgs simdArgs;
// TODO: Add private syms and vars.
simdArgs.priv.syms = dsp.getDelayedPrivSymbols();
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;
auto simdOp =
Expand Down
6 changes: 3 additions & 3 deletions flang/test/Lower/OpenMP/order-clause.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

!CHECK-LABEL: func.func @_QPsimd_order() {
subroutine simd_order
!CHECK: omp.simd order(reproducible:concurrent) {
!CHECK: omp.simd order(reproducible:concurrent) private({{.*}}) {
!$omp simd order(concurrent)
do i = 1, 10
end do
!CHECK: omp.simd order(reproducible:concurrent) {
!CHECK: omp.simd order(reproducible:concurrent) private({{.*}}) {
!$omp simd order(reproducible:concurrent)
do i = 1, 10
end do
!CHECK: omp.simd order(unconstrained:concurrent) {
!CHECK: omp.simd order(unconstrained:concurrent) private({{.*}}) {
!$omp simd order(unconstrained:concurrent)
do i = 1, 10
end do
Expand Down
32 changes: 20 additions & 12 deletions flang/test/Lower/OpenMP/parallel-private-clause.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@
! RUN: bbc --use-desc-for-alloc=false -fopenmp -emit-hlfir %s -o - \
! RUN: | FileCheck %s --check-prefix=FIRDialect

! FIRDialect: omp.private {type = private} @_QFsimd_loop_1Er_private_ref_box_heap_f32 {{.*}} alloc {
! FIRDialect: [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}r"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)
! FIRDialect: omp.yield([[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>)
! FIRDialect: } dealloc {
! FIRDialect: ^bb0([[R_DECL:%.*]]: !fir.ref<!fir.box<!fir.heap<f32>>>):
! FIRDialect: {{%.*}} = fir.load [[R_DECL]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: fir.if {{%.*}} {
! FIRDialect: [[LD:%.*]] = fir.load [[R_DECL]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
! FIRDialect: fir.freemem [[AD]] : !fir.heap<f32>
! FIRDialect: fir.store {{%.*}} to [[R_DECL]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: omp.yield
! FIRDialect: }

!FIRDialect: omp.private {type = private} @[[DERIVED_PRIVATIZER:_QFprivate_clause_derived_typeEt_private_ref_rec__QFprivate_clause_derived_typeTmy_type]] : !fir.ref<!fir.type<_QFprivate_clause_derived_typeTmy_type{t_i:i32,t_arr:!fir.array<5xi32>}>> alloc {
!FIRDialect: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QFprivate_clause_derived_typeTmy_type{t_i:i32,t_arr:!fir.array<5xi32>}>>):
!FIRDialect: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.type<_QFprivate_clause_derived_typeTmy_type{t_i:i32,t_arr:!fir.array<5xi32>}> {bindc_name = "t", pinned, uniq_name = "_QFprivate_clause_derived_typeEt"}
Expand Down Expand Up @@ -246,7 +263,6 @@ subroutine parallel_pointer()
!$omp end parallel
end subroutine parallel_pointer


!FIRDialect-LABEL: func @_QPsimple_loop_1()
subroutine simple_loop_1
integer :: i
Expand Down Expand Up @@ -354,20 +370,17 @@ subroutine simple_loop_3
! FIRDialect: omp.terminator
end subroutine


!CHECK-LABEL: func @_QPsimd_loop_1()
subroutine simd_loop_1
integer :: i
real, allocatable :: r;
! FIRDialect: [[R:%.*]] = fir.alloca !fir.box<!fir.heap<f32>> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"}
! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: [[R_DECL:%.*]]:2 = hlfir.declare [[R]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "{{.*}}r"} : (!fir.ref<!fir.box<!fir.heap<f32>>>) -> (!fir.ref<!fir.box<!fir.heap<f32>>>, !fir.ref<!fir.box<!fir.heap<f32>>>)

! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32
! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32
! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32

! FIRDialect: omp.simd {
! FIRDialect: omp.simd private({{.*}}) {
! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$OMP SIMD PRIVATE(r)
do i=1, 9
Expand All @@ -378,10 +391,5 @@ subroutine simd_loop_1
end do
!$OMP END SIMD
! FIRDialect: omp.yield
! FIRDialect: {{%.*}} = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: fir.if {{%.*}} {
! FIRDialect: [[LD:%.*]] = fir.load [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>
! FIRDialect: [[AD:%.*]] = fir.box_addr [[LD]] : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
! FIRDialect: fir.freemem [[AD]] : !fir.heap<f32>
! FIRDialect: fir.store {{%.*}} to [[R_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<f32>>>

end subroutine
28 changes: 13 additions & 15 deletions flang/test/Lower/OpenMP/simd.f90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ subroutine simd
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK-NEXT: %[[UB:.*]] = arith.constant 9 : i32
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.simd {
! CHECK-NEXT: omp.simd private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i=1, 9
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -33,7 +33,7 @@ subroutine simd_with_if_clause(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd if(%[[COND:.*]]) {
! CHECK: omp.simd if(%[[COND:.*]]) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -52,7 +52,7 @@ subroutine simd_with_simdlen_clause(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd simdlen(2) {
! CHECK: omp.simd simdlen(2) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -72,7 +72,7 @@ subroutine simd_with_simdlen_clause_from_param(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd simdlen(2) {
! CHECK: omp.simd simdlen(2) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -92,7 +92,7 @@ subroutine simd_with_simdlen_clause_from_expr_from_param(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd simdlen(6) {
! CHECK: omp.simd simdlen(6) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -111,7 +111,7 @@ subroutine simd_with_safelen_clause(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd safelen(2) {
! CHECK: omp.simd safelen(2) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -131,7 +131,7 @@ subroutine simd_with_safelen_clause_from_expr_from_param(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd safelen(6) {
! CHECK: omp.simd safelen(6) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -150,7 +150,7 @@ subroutine simd_with_simdlen_safelen_clause(n, threshold)
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK: %[[UB:.*]] = fir.load %[[ARG_N]]#0
! CHECK: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK: omp.simd safelen(2) simdlen(1) {
! CHECK: omp.simd safelen(2) simdlen(1) private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
do i = 1, n
! CHECK: fir.store %[[I]] to %[[LOCAL:.*]]#1 : !fir.ref<i32>
Expand All @@ -171,7 +171,7 @@ subroutine simd_with_collapse_clause(n)
! CHECK: %[[LOWER_J:.*]] = arith.constant 1 : i32
! CHECK: %[[UPPER_J:.*]] = fir.load %[[PARAM_ARG:.*]] : !fir.ref<i32>
! CHECK: %[[STEP_J:.*]] = arith.constant 1 : i32
! CHECK: omp.simd {
! CHECK: omp.simd private({{.*}}) {
! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = (
! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to (
! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step (
Expand Down Expand Up @@ -235,7 +235,7 @@ subroutine simd_with_nontemporal_clause(n)
!CHECK: %[[LB:.*]] = arith.constant 1 : i32
!CHECK: %[[UB:.*]] = fir.load %{{.*}}#0 : !fir.ref<i32>
!CHECK: %[[STEP:.*]] = arith.constant 1 : i32
!CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref<i32>, !fir.ref<i32>) {
!CHECK: omp.simd nontemporal(%[[A_DECL]]#1, %[[C_DECL]]#1 : !fir.ref<i32>, !fir.ref<i32>) private({{.*}}) {
!CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$OMP SIMD NONTEMPORAL(A, C)
do i = 1, n
Expand All @@ -249,16 +249,14 @@ subroutine lastprivate_with_simd

!CHECK: %[[VAR_SUM:.*]] = fir.alloca f32 {bindc_name = "sum", uniq_name = "_QFlastprivate_with_simdEsum"}
!CHECK: %[[VAR_SUM_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM]] {{.*}}
!CHECK: %[[VAR_SUM_PINNED:.*]] = fir.alloca f32 {bindc_name = "sum", pinned, uniq_name = "_QFlastprivate_with_simdEsum"}
!CHECK: %[[VAR_SUM_PINNED_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM_PINNED]] {{.*}}

implicit none
integer :: i
real :: sum


!CHECK: omp.simd {
!CHECK: omp.simd private(@_QFlastprivate_with_simdEsum_private_ref_f32 %[[VAR_SUM_DECLARE]]#0 -> %[[VAR_SUM_PINNED:.*]], @{{.*}}) {
!CHECK: omp.loop_nest (%[[ARG:.*]]) : i32 = ({{.*}} to ({{.*}}) inclusive step ({{.*}}) {
!CHECK: %[[VAR_SUM_PINNED_DECLARE:.*]]:2 = hlfir.declare %[[VAR_SUM_PINNED]] {{.*}}
!CHECK: %[[ADD_RESULT:.*]] = arith.addi {{.*}}
!CHECK: %[[ADD_RESULT_CONVERT:.*]] = fir.convert %[[ADD_RESULT]] : (i32) -> f32
!CHECK: hlfir.assign %[[ADD_RESULT_CONVERT]] to %[[VAR_SUM_PINNED_DECLARE]]#0 : f32, !fir.ref<f32>
Expand All @@ -283,7 +281,7 @@ subroutine simd_with_reduction_clause
! CHECK: %[[LB:.*]] = arith.constant 1 : i32
! CHECK-NEXT: %[[UB:.*]] = arith.constant 9 : i32
! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32
! CHECK-NEXT: omp.simd reduction(@[[REDUCER]] %[[X:.*]]#0 -> %[[X_RED:.*]] : !fir.ref<i32>) {
! CHECK-NEXT: omp.simd private({{.*}}) reduction(@[[REDUCER]] %[[X:.*]]#0 -> %[[X_RED:.*]] : !fir.ref<i32>) {
! CHECK-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$omp simd reduction(+:x)
do i=1, 9
Expand Down
7 changes: 1 addition & 6 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5203,12 +5203,7 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
Function *F = CanonicalLoop->getFunction();

// Define where if branch should be inserted
Instruction *SplitBefore;
if (Instruction::classof(IfCond)) {
SplitBefore = dyn_cast<Instruction>(IfCond);
} else {
SplitBefore = CanonicalLoop->getPreheader()->getTerminator();
}
Instruction *SplitBefore = CanonicalLoop->getPreheader()->getTerminator();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just out of curiosity, why this part was changed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The IfCond could happen to be defined by the allocation block where we inline the omp.private alloc regions. If we split right after IfCond, the private allocations will not dominate the else version of the loop.


// TODO: We should not rely on pass manager. Currently we use pass manager
// only for getting llvm::Loop which corresponds to given CanonicalLoopInfo
Expand Down
2 changes: 1 addition & 1 deletion mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2116,7 +2116,7 @@ void SimdOp::build(OpBuilder &builder, OperationState &state,
makeArrayAttr(ctx, clauses.alignments), clauses.ifExpr,
/*linear_vars=*/{}, /*linear_step_vars=*/{},
clauses.nontemporalVars, clauses.order, clauses.orderMod,
/*private_vars=*/{}, /*private_syms=*/nullptr,
clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms),
clauses.reductionVars,
makeDenseBoolArrayAttr(ctx, clauses.reductionByref),
makeArrayAttr(ctx, clauses.reductionSyms), clauses.safelen,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ static LogicalResult checkImplementationStatus(Operation &op) {
.Case([&](omp::SimdOp op) {
checkLinear(op, result);
checkNontemporal(op, result);
checkPrivate(op, result);
checkReduction(op, result);
})
.Case<omp::AtomicReadOp, omp::AtomicWriteOp, omp::AtomicUpdateOp,
Expand Down Expand Up @@ -2230,8 +2229,28 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();

MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*simdOp).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
collectPrivatizationDecls(simdOp, privateDecls);

for (mlir::Value privateVar : simdOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, opInst).failed())
return failure();

// Generator of the canonical loop body.
SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
Expand Down Expand Up @@ -2331,7 +2350,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
order, simdlen, safelen);

builder.restoreIP(afterIP);
return success();

return cleanupPrivateVars(builder, moduleTranslation, simdOp.getLoc(),
llvmPrivateVars, privateDecls);
}

/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
Expand Down
Loading
Loading