Skip to content

8356708: C2: loop strip mining expansion doesn't take sunk stores into account #25929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: jdk25
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 140 additions & 25 deletions src/hotspot/share/opto/loopnode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,19 +298,52 @@ IdealLoopTree* PhaseIdealLoop::insert_outer_loop(IdealLoopTree* loop, LoopNode*
return outer_ilt;
}

// Create a skeleton strip mined outer loop: a Loop head before the
// inner strip mined loop, a safepoint and an exit condition guarded
// by an opaque node after the inner strip mined loop with a backedge
// to the loop head. The inner strip mined loop is left as it is. Only
// once loop optimizations are over, do we adjust the inner loop exit
// condition to limit its number of iterations, set the outer loop
// exit condition and add Phis to the outer loop head. Some loop
// optimizations that operate on the inner strip mined loop need to be
// aware of the outer strip mined loop: loop unswitching needs to
// clone the outer loop as well as the inner, unrolling needs to only
// clone the inner loop etc. No optimizations need to change the outer
// strip mined loop as it is only a skeleton.
IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
// Create a skeleton strip mined outer loop: an OuterStripMinedLoop head before the inner strip mined CountedLoop, a
// SafePoint on exit of the inner CountedLoopEnd and an OuterStripMinedLoopEnd test that can't constant fold until loop
// optimizations are over. The inner strip mined loop is left as it is. Only once loop optimizations are over, do we
// adjust the inner loop exit condition to limit its number of iterations, set the outer loop exit condition and add
// Phis to the outer loop head. Some loop optimizations that operate on the inner strip mined loop need to be aware of
// the outer strip mined loop: loop unswitching needs to clone the outer loop as well as the inner, unrolling needs to
// only clone the inner loop etc. No optimizations need to change the outer strip mined loop as it is only a skeleton.
//
// Schematically:
//
// OuterStripMinedLoop -------|
// | |
// CountedLoop ----------- | |
// \- Phi (iv) -| | |
// / \ | | |
// CmpI AddI --| | |
// \ | |
// Bool | |
// \ | |
// CountedLoopEnd | |
// / \ | |
// IfFalse IfTrue--------| |
// | |
// SafePoint |
// | |
// OuterStripMinedLoopEnd |
// / \ |
// IfFalse IfTrue-----------|
// |
//
//
// As loop optimizations transform the inner loop, the outer strip mined loop stays mostly unchanged. The only exception
// is nodes referenced from the SafePoint and sunk from the inner loop: they end up in the outer strip mined loop.
//
// Not adding Phis to the outer loop head from the beginning, and only adding them after loop optimizations does not
// conform to C2's IR rules: any variable or memory slice that is mutated in a loop should have a Phi. The main
// motivation for such a design that doesn't conform to C2's IR rules is to allow existing loop optimizations to be
// mostly unaffected by the outer strip mined loop: the only extra step needed in most cases is to step over the
// OuterStripMinedLoop. The main drawback is that once loop optimizations are over, an extra step is needed to finish
// constructing the outer loop. This is handled by OuterStripMinedLoopNode::adjust_strip_mined_loop().
//
// Adding Phis to the outer loop is largely straightforward: there needs to be one Phi in the outer loop for every Phi
// in the inner loop. Things may be more complicated for sunk Store nodes: there may not be any inner loop Phi left
// after sinking for a particular memory slice but the outer loop needs a Phi. See
// OuterStripMinedLoopNode::handle_sunk_stores_when_finishing_construction()
IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(Node* init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse) {
Node* outer_test = intcon(0);
Expand Down Expand Up @@ -2255,9 +2288,8 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*&loop, BasicType iv_
is_deleteable_safept(sfpt);
IdealLoopTree* outer_ilt = nullptr;
if (strip_mine_loop) {
outer_ilt = create_outer_strip_mined_loop(test, cmp, init_control, loop,
cl_prob, le->_fcnt, entry_control,
iffalse);
outer_ilt = create_outer_strip_mined_loop(init_control, loop, cl_prob, le->_fcnt,
entry_control, iffalse);
}

// Now setup a new CountedLoopNode to replace the existing LoopNode
Expand Down Expand Up @@ -2870,10 +2902,11 @@ BaseCountedLoopNode* BaseCountedLoopNode::make(Node* entry, Node* backedge, Basi
return new LongCountedLoopNode(entry, backedge);
}

void OuterStripMinedLoopNode::fix_sunk_stores(CountedLoopEndNode* inner_cle, LoopNode* inner_cl, PhaseIterGVN* igvn,
PhaseIdealLoop* iloop) {
Node* cle_out = inner_cle->proj_out(false);
Node* cle_tail = inner_cle->proj_out(true);
void OuterStripMinedLoopNode::fix_sunk_stores_when_back_to_counted_loop(PhaseIterGVN* igvn,
PhaseIdealLoop* iloop) const {
CountedLoopNode* inner_cl = inner_counted_loop();
IfFalseNode* cle_out = inner_loop_exit();

if (cle_out->outcnt() > 1) {
// Look for chains of stores that were sunk
// out of the inner loop and are in the outer loop
Expand Down Expand Up @@ -2988,11 +3021,90 @@ void OuterStripMinedLoopNode::fix_sunk_stores(CountedLoopEndNode* inner_cle, Loo
}
}

// The outer strip mined loop is initially only partially constructed. In particular Phis are omitted.
// See comment above: PhaseIdealLoop::create_outer_strip_mined_loop()
// We're now in the process of finishing the construction of the outer loop. For each Phi in the inner loop, a Phi in
// the outer loop was just now created. However, Sunk Stores cause an extra challenge:
// 1) If all Stores in the inner loop were sunk for a particular memory slice, there's no Phi left for that memory slice
// in the inner loop anymore, and hence we did not yet add a Phi for the outer loop. So an extra Phi must now be
// added for each chain of sunk Stores for a particular memory slice.
// 2) If some Stores were sunk and some left in the inner loop, a Phi was already created in the outer loop but
// its backedge input wasn't wired correctly to the last Store of the chain: the backedge input was set to the
// backedge of the inner loop Phi instead, but it needs to be the last Store of the chain in the outer loop. We now
// have to fix that too.
void OuterStripMinedLoopNode::handle_sunk_stores_when_finishing_construction(PhaseIterGVN* igvn) {
IfFalseNode* cle_exit_proj = inner_loop_exit();

// Find Sunk stores: Sunk stores are pinned on the loop exit projection of the inner loop. Indeed, because Sunk Stores
// modify the memory state captured by the SafePoint in the outer strip mined loop, they must be above it. The
// SafePoint's control input is the loop exit projection. It's also the only control out of the inner loop above the
// SafePoint.
#ifdef ASSERT
int stores_in_outer_loop_cnt = 0;
for (DUIterator_Fast imax, i = cle_exit_proj->fast_outs(imax); i < imax; i++) {
Node* u = cle_exit_proj->fast_out(i);
if (u->is_Store()) {
stores_in_outer_loop_cnt++;
}
}
#endif

// Sunk stores are reachable from the memory state of the outer loop safepoint
SafePointNode* safepoint = outer_safepoint();
MergeMemNode* mm = safepoint->in(TypeFunc::Memory)->isa_MergeMem();
if (mm == nullptr) {
// There is no MergeMem, which should only happen if there was no memory node
// sunk out of the loop.
assert(stores_in_outer_loop_cnt == 0, "inconsistent");
return;
}
DEBUG_ONLY(int stores_in_outer_loop_cnt2 = 0);
for (MergeMemStream mms(mm); mms.next_non_empty();) {
Node* mem = mms.memory();
// Traverse up the chain of stores to find the first store pinned
// at the loop exit projection.
Node* last = mem;
Node* first = nullptr;
while (mem->is_Store() && mem->in(0) == cle_exit_proj) {
DEBUG_ONLY(stores_in_outer_loop_cnt2++);
first = mem;
mem = mem->in(MemNode::Memory);
}
if (first != nullptr) {
// Found a chain of Stores that were sunk
// Do we already have a memory Phi for that slice on the outer loop? If that is the case, that Phi was created
// by cloning an inner loop Phi. The inner loop Phi should have mem, the memory state of the first Store out of
// the inner loop, as input on the backedge. So does the outer loop Phi given it's a clone.
Node* phi = nullptr;
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
Node* u = mem->fast_out(i);
if (u->is_Phi() && u->in(0) == this && u->in(LoopBackControl) == mem) {
assert(phi == nullptr, "there should be only one");
phi = u;
PRODUCT_ONLY(break);
}
}
if (phi == nullptr) {
// No outer loop Phi? create one
phi = PhiNode::make(this, last);
phi->set_req(EntryControl, mem);
phi = igvn->transform(phi);
igvn->replace_input_of(first, MemNode::Memory, phi);
} else {
// Fix memory state along the backedge: it should be the last sunk Store of the chain
igvn->replace_input_of(phi, LoopBackControl, last);
}
}
}
assert(stores_in_outer_loop_cnt == stores_in_outer_loop_cnt2, "inconsistent");
}

void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
verify_strip_mined(1);
// Look for the outer & inner strip mined loop, reduce number of
// iterations of the inner loop, set exit condition of outer loop,
// construct required phi nodes for outer loop.
CountedLoopNode* inner_cl = unique_ctrl_out()->as_CountedLoop();
CountedLoopNode* inner_cl = inner_counted_loop();
assert(inner_cl->is_strip_mined(), "inner loop should be strip mined");
if (LoopStripMiningIter == 0) {
remove_outer_loop_and_safepoint(igvn);
Expand All @@ -3010,7 +3122,7 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
inner_cl->clear_strip_mined();
return;
}
CountedLoopEndNode* inner_cle = inner_cl->loopexit();
CountedLoopEndNode* inner_cle = inner_counted_loop_end();

int stride = inner_cl->stride_con();
// For a min int stride, LoopStripMiningIter * stride overflows the int range for all values of LoopStripMiningIter
Expand Down Expand Up @@ -3091,8 +3203,9 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
}

Node* iv_phi = nullptr;
// Make a clone of each phi in the inner loop
// for the outer loop
// Make a clone of each phi in the inner loop for the outer loop
// When Stores were Sunk, after this step, a Phi may still be missing or its backedge incorrectly wired. See
// handle_sunk_stores_when_finishing_construction()
for (uint i = 0; i < inner_cl->outcnt(); i++) {
Node* u = inner_cl->raw_out(i);
if (u->is_Phi()) {
Expand All @@ -3111,6 +3224,8 @@ void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
}
}

handle_sunk_stores_when_finishing_construction(igvn);

if (iv_phi != nullptr) {
// Now adjust the inner loop's exit condition
Node* limit = inner_cl->limit();
Expand Down Expand Up @@ -3166,7 +3281,7 @@ void OuterStripMinedLoopNode::transform_to_counted_loop(PhaseIterGVN* igvn, Phas
CountedLoopEndNode* inner_cle = inner_cl->loopexit();
Node* safepoint = outer_safepoint();

fix_sunk_stores(inner_cle, inner_cl, igvn, iloop);
fix_sunk_stores_when_back_to_counted_loop(igvn, iloop);

// make counted loop exit test always fail
ConINode* zero = igvn->intcon(0);
Expand Down
9 changes: 7 additions & 2 deletions src/hotspot/share/opto/loopnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,8 @@ class LoopLimitNode : public Node {
// Support for strip mining
class OuterStripMinedLoopNode : public LoopNode {
private:
static void fix_sunk_stores(CountedLoopEndNode* inner_cle, LoopNode* inner_cl, PhaseIterGVN* igvn, PhaseIdealLoop* iloop);
void fix_sunk_stores_when_back_to_counted_loop(PhaseIterGVN* igvn, PhaseIdealLoop* iloop) const;
void handle_sunk_stores_when_finishing_construction(PhaseIterGVN* igvn);

public:
OuterStripMinedLoopNode(Compile* C, Node *entry, Node *backedge)
Expand All @@ -589,6 +590,10 @@ class OuterStripMinedLoopNode : public LoopNode {
virtual OuterStripMinedLoopEndNode* outer_loop_end() const;
virtual IfFalseNode* outer_loop_exit() const;
virtual SafePointNode* outer_safepoint() const;
CountedLoopNode* inner_counted_loop() const { return unique_ctrl_out()->as_CountedLoop(); }
CountedLoopEndNode* inner_counted_loop_end() const { return inner_counted_loop()->loopexit(); }
IfFalseNode* inner_loop_exit() const { return inner_counted_loop_end()->proj_out(false)->as_IfFalse(); }

void adjust_strip_mined_loop(PhaseIterGVN* igvn);

void remove_outer_loop_and_safepoint(PhaseIterGVN* igvn) const;
Expand Down Expand Up @@ -1293,7 +1298,7 @@ class PhaseIdealLoop : public PhaseTransform {
void add_parse_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt);
SafePointNode* find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop);
IdealLoopTree* insert_outer_loop(IdealLoopTree* loop, LoopNode* outer_l, Node* outer_ift);
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* create_outer_strip_mined_loop(Node* init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
Node*& entry_control, Node*& iffalse);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright (c) 2025, Red Hat, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

/**
* @test
* @bug 8356708
* @summary C2: loop strip mining expansion doesn't take sunk stores into account
*
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
* -XX:+UnlockDiagnosticVMOptions -XX:+StressGCM -XX:StressSeed=26601954 TestStoresSunkInOuterStripMinedLoop
* @run main/othervm -XX:-TieredCompilation -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:LoopMaxUnroll=0
* -XX:+UnlockDiagnosticVMOptions -XX:+StressGCM TestStoresSunkInOuterStripMinedLoop
* @run main TestStoresSunkInOuterStripMinedLoop
*
*/

public class TestStoresSunkInOuterStripMinedLoop {
private static int field;
private static volatile int volatileField;

public static void main(String[] args) {
A a1 = new A();
A a2 = new A();
A a3 = new A();
for (int i = 0; i < 20_000; i++) {
field = 0;
test1();
if (field != 1500) {
throw new RuntimeException(field + " != 1500");
}
a1.field = 0;
test2(a1, a2);
if (a1.field != 1500) {
throw new RuntimeException(a1.field + " != 1500");
}
a1.field = 0;
test3(a1, a2);
if (a1.field != 1500) {
throw new RuntimeException(a1.field + " != 1500");
}
a1.field = 0;
test4(a1, a2, a3);
if (a1.field != 1500) {
throw new RuntimeException(a1.field + " != 1500");
}
}
}

// Single store sunk in outer loop, no store in inner loop
private static float test1() {
int v = field;
float f = 1;
for (int i = 0; i < 1500; i++) {
f *= 2;
v++;
field = v;
}
return f;
}

// Multiple stores sunk in outer loop, no store in inner loop
private static float test2(A a1, A a2) {
field = a1.field + a2.field;
volatileField = 42;
int v = a1.field;
float f = 1;
for (int i = 0; i < 1500; i++) {
f *= 2;
v++;
a1.field = v;
a2.field = v;
}
return f;
}

// Store sunk in outer loop, store in inner loop
private static float test3(A a1, A a2) {
field = a1.field + a2.field;
volatileField = 42;
int v = a1.field;
float f = 1;
A a = a2;
for (int i = 0; i < 1500; i++) {
f *= 2;
v++;
a.field = v;
a = a1;
a2.field = v;
}
return f;
}

// Multiple stores sunk in outer loop, store in inner loop
private static float test4(A a1, A a2, A a3) {
field = a1.field + a2.field + a3.field;
volatileField = 42;
int v = a1.field;
float f = 1;
A a = a2;
for (int i = 0; i < 1500; i++) {
f *= 2;
v++;
a.field = v;
a = a1;
a2.field = v;
a3.field = v;
}
return f;
}

static class A {
int field;
}
}