Skip to content

Commit 5a35962

Browse files
Merge tag 'jdk-24+8' into labsjdk/automation-7-25-2024-746
Added tag jdk-24+8 for changeset 0898ab7
2 parents fee9769 + 0898ab7 commit 5a35962

File tree

249 files changed

+5389
-1919
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

249 files changed

+5389
-1919
lines changed

src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -35,8 +35,6 @@
3535
typedef ByteSize (*OffsetFunction)(uint);
3636

3737
class InterpreterMacroAssembler: public MacroAssembler {
38-
protected:
39-
4038
protected:
4139
// Interpreter specific version of call_VM_base
4240
using MacroAssembler::call_VM_leaf_base;
@@ -112,8 +110,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
112110

113111
void get_dispatch();
114112

115-
// Helpers for runtime call arguments/results
116-
117113
// Helpers for runtime call arguments/results
118114
void get_method(Register reg) {
119115
ldr(reg, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
@@ -181,7 +177,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
181177
void load_ptr(int n, Register val);
182178
void store_ptr(int n, Register val);
183179

184-
// Load float value from 'address'. The value is loaded onto the FPU register v0.
180+
// Load float value from 'address'. The value is loaded onto the FPU register v0.
185181
void load_float(Address src);
186182
void load_double(Address src);
187183

src/hotspot/cpu/riscv/assembler_riscv.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,10 @@ enum VectorMask {
14151415
INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110);
14161416
INSN(vredmax_vs, 0b1010111, 0b010, 0b000111);
14171417

1418+
// Vector Widening Integer Reduction Instructions
1419+
INSN(vwredsum_vs, 0b1010111, 0b000, 0b110001);
1420+
INSN(vwredsumu_vs, 0b1010111, 0b000, 0b110000);
1421+
14181422
// Vector Floating-Point Compare Instructions
14191423
INSN(vmfle_vv, 0b1010111, 0b001, 0b011001);
14201424
INSN(vmflt_vv, 0b1010111, 0b001, 0b011011);
@@ -1453,6 +1457,10 @@ enum VectorMask {
14531457
INSN(vmulh_vv, 0b1010111, 0b010, 0b100111);
14541458
INSN(vmul_vv, 0b1010111, 0b010, 0b100101);
14551459

1460+
// Vector Widening Integer Multiply Instructions
1461+
INSN(vwmul_vv, 0b1010111, 0b010, 0b111011);
1462+
INSN(vwmulu_vv, 0b1010111, 0b010, 0b111000);
1463+
14561464
// Vector Integer Min/Max Instructions
14571465
INSN(vmax_vv, 0b1010111, 0b000, 0b000111);
14581466
INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110);

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5103,6 +5103,262 @@ class StubGenerator: public StubCodeGenerator {
51035103
return (address) start;
51045104
}
51055105

5106+
void adler32_process_bytes(Register buff, Register s1, Register s2, VectorRegister vtable,
5107+
VectorRegister vzero, VectorRegister vbytes, VectorRegister vs1acc, VectorRegister vs2acc,
5108+
Register temp0, Register temp1, Register temp2, Register temp3,
5109+
VectorRegister vtemp1, VectorRegister vtemp2, int step, Assembler::LMUL lmul) {
5110+
5111+
assert((lmul == Assembler::m4 && step == 64) ||
5112+
(lmul == Assembler::m2 && step == 32) ||
5113+
(lmul == Assembler::m1 && step == 16),
5114+
"LMUL should be aligned with step: m4 and 64, m2 and 32 or m1 and 16");
5115+
// Below is function for calculating Adler32 checksum with 64-, 32- or 16-byte step. LMUL=m4, m2 or m1 is used.
5116+
// The results are in v12, v13, ..., v22, v23. Example below is for 64-byte step case.
5117+
// We use b1, b2, ..., b64 to denote the 64 bytes loaded in each iteration.
5118+
// In non-vectorized code, we update s1 and s2 as:
5119+
// s1 <- s1 + b1
5120+
// s2 <- s2 + s1
5121+
// s1 <- s1 + b2
5122+
// s2 <- s2 + b1
5123+
// ...
5124+
// s1 <- s1 + b64
5125+
// s2 <- s2 + s1
5126+
// Putting above assignments together, we have:
5127+
// s1_new = s1 + b1 + b2 + ... + b64
5128+
// s2_new = s2 + (s1 + b1) + (s1 + b1 + b2) + ... + (s1 + b1 + b2 + ... + b64) =
5129+
// = s2 + s1 * 64 + (b1 * 64 + b2 * 63 + ... + b64 * 1) =
5130+
// = s2 + s1 * 64 + (b1, b2, ... b64) dot (64, 63, ... 1)
5131+
5132+
__ mv(temp3, step);
5133+
// Load data
5134+
__ vsetvli(temp0, temp3, Assembler::e8, lmul);
5135+
__ vle8_v(vbytes, buff);
5136+
__ addi(buff, buff, step);
5137+
5138+
// Upper bound reduction sum for s1_new:
5139+
// 0xFF * 64 = 0x3FC0, so:
5140+
// 1. Need to do vector-widening reduction sum
5141+
// 2. It is safe to perform sign-extension during vmv.x.s with 16-bits elements
5142+
__ vwredsumu_vs(vs1acc, vbytes, vzero);
5143+
// Multiplication for s2_new
5144+
__ vwmulu_vv(vs2acc, vtable, vbytes);
5145+
5146+
// s2 = s2 + s1 * log2(step)
5147+
__ slli(temp1, s1, exact_log2(step));
5148+
__ add(s2, s2, temp1);
5149+
5150+
// Summing up calculated results for s2_new
5151+
if (MaxVectorSize > 16) {
5152+
__ vsetvli(temp0, temp3, Assembler::e16, lmul);
5153+
} else {
5154+
// Half of vector-widening multiplication result is in successor of vs2acc
5155+
// group for vlen == 16, in which case we need to double vector register
5156+
// group width in order to reduction sum all of them
5157+
Assembler::LMUL lmulx2 = (lmul == Assembler::m1) ? Assembler::m2 :
5158+
(lmul == Assembler::m2) ? Assembler::m4 : Assembler::m8;
5159+
__ vsetvli(temp0, temp3, Assembler::e16, lmulx2);
5160+
}
5161+
// Upper bound for reduction sum:
5162+
// 0xFF * (64 + 63 + ... + 2 + 1) = 0x817E0 max for whole register group, so:
5163+
// 1. Need to do vector-widening reduction sum
5164+
// 2. It is safe to perform sign-extension during vmv.x.s with 32-bits elements
5165+
__ vwredsumu_vs(vtemp1, vs2acc, vzero);
5166+
5167+
// Extracting results for:
5168+
// s1_new
5169+
__ vmv_x_s(temp0, vs1acc);
5170+
__ add(s1, s1, temp0);
5171+
// s2_new
5172+
__ vsetvli(temp0, temp3, Assembler::e32, Assembler::m1);
5173+
__ vmv_x_s(temp1, vtemp1);
5174+
__ add(s2, s2, temp1);
5175+
}
5176+
5177+
/***
5178+
* int java.util.zip.Adler32.updateBytes(int adler, byte[] b, int off, int len)
5179+
*
5180+
* Arguments:
5181+
*
5182+
* Inputs:
5183+
* c_rarg0 - int adler
5184+
* c_rarg1 - byte* buff (b + off)
5185+
* c_rarg2 - int len
5186+
*
5187+
* Output:
5188+
* c_rarg0 - int adler result
5189+
*/
5190+
address generate_updateBytesAdler32() {
5191+
__ align(CodeEntryAlignment);
5192+
StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
5193+
address start = __ pc();
5194+
5195+
Label L_nmax, L_nmax_loop, L_nmax_loop_entry, L_by16, L_by16_loop,
5196+
L_by16_loop_unroll, L_by1_loop, L_do_mod, L_combine, L_by1;
5197+
5198+
// Aliases
5199+
Register adler = c_rarg0;
5200+
Register s1 = c_rarg0;
5201+
Register s2 = c_rarg3;
5202+
Register buff = c_rarg1;
5203+
Register len = c_rarg2;
5204+
Register nmax = c_rarg4;
5205+
Register base = c_rarg5;
5206+
Register count = c_rarg6;
5207+
Register temp0 = x28; // t3
5208+
Register temp1 = x29; // t4
5209+
Register temp2 = x30; // t5
5210+
Register temp3 = x31; // t6
5211+
5212+
VectorRegister vzero = v31;
5213+
VectorRegister vbytes = v8; // group: v8, v9, v10, v11
5214+
VectorRegister vs1acc = v12; // group: v12, v13, v14, v15
5215+
VectorRegister vs2acc = v16; // group: v16, v17, v18, v19, v20, v21, v22, v23
5216+
VectorRegister vtable_64 = v24; // group: v24, v25, v26, v27
5217+
VectorRegister vtable_32 = v4; // group: v4, v5
5218+
VectorRegister vtable_16 = v30;
5219+
VectorRegister vtemp1 = v28;
5220+
VectorRegister vtemp2 = v29;
5221+
5222+
// Max number of bytes we can process before having to take the mod
5223+
// 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
5224+
const uint64_t BASE = 0xfff1;
5225+
const uint64_t NMAX = 0x15B0;
5226+
5227+
// Loops steps
5228+
int step_64 = 64;
5229+
int step_32 = 32;
5230+
int step_16 = 16;
5231+
int step_1 = 1;
5232+
5233+
__ enter(); // Required for proper stackwalking of RuntimeStub frame
5234+
__ mv(temp1, 64);
5235+
__ vsetvli(temp0, temp1, Assembler::e8, Assembler::m4);
5236+
5237+
// Generating accumulation coefficients for further calculations
5238+
// vtable_64:
5239+
__ vid_v(vtemp1);
5240+
__ vrsub_vx(vtable_64, vtemp1, temp1);
5241+
// vtable_64 group now contains { 0x40, 0x3f, 0x3e, ..., 0x3, 0x2, 0x1 }
5242+
5243+
// vtable_32:
5244+
__ mv(temp1, 32);
5245+
__ vsetvli(temp0, temp1, Assembler::e8, Assembler::m2);
5246+
__ vid_v(vtemp1);
5247+
__ vrsub_vx(vtable_32, vtemp1, temp1);
5248+
// vtable_32 group now contains { 0x20, 0x1f, 0x1e, ..., 0x3, 0x2, 0x1 }
5249+
5250+
__ vsetivli(temp0, 16, Assembler::e8, Assembler::m1);
5251+
// vtable_16:
5252+
__ mv(temp1, 16);
5253+
__ vid_v(vtemp1);
5254+
__ vrsub_vx(vtable_16, vtemp1, temp1);
5255+
// vtable_16 now contains { 0x10, 0xf, 0xe, ..., 0x3, 0x2, 0x1 }
5256+
5257+
__ vmv_v_i(vzero, 0);
5258+
5259+
__ mv(base, BASE);
5260+
__ mv(nmax, NMAX);
5261+
5262+
// s1 is initialized to the lower 16 bits of adler
5263+
// s2 is initialized to the upper 16 bits of adler
5264+
__ srliw(s2, adler, 16); // s2 = ((adler >> 16) & 0xffff)
5265+
__ zero_extend(s1, adler, 16); // s1 = (adler & 0xffff)
5266+
5267+
// The pipelined loop needs at least 16 elements for 1 iteration
5268+
// It does check this, but it is more effective to skip to the cleanup loop
5269+
__ mv(temp0, step_16);
5270+
__ bgeu(len, temp0, L_nmax);
5271+
__ beqz(len, L_combine);
5272+
5273+
// Jumping to L_by1_loop
5274+
__ sub(len, len, step_1);
5275+
__ j(L_by1_loop);
5276+
5277+
__ bind(L_nmax);
5278+
__ sub(len, len, nmax);
5279+
__ sub(count, nmax, 16);
5280+
__ bltz(len, L_by16);
5281+
5282+
// Align L_nmax loop by 64
5283+
__ bind(L_nmax_loop_entry);
5284+
__ sub(count, count, 32);
5285+
5286+
__ bind(L_nmax_loop);
5287+
adler32_process_bytes(buff, s1, s2, vtable_64, vzero,
5288+
vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3,
5289+
vtemp1, vtemp2, step_64, Assembler::m4);
5290+
__ sub(count, count, step_64);
5291+
__ bgtz(count, L_nmax_loop);
5292+
5293+
// There are three iterations left to do
5294+
adler32_process_bytes(buff, s1, s2, vtable_32, vzero,
5295+
vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3,
5296+
vtemp1, vtemp2, step_32, Assembler::m2);
5297+
adler32_process_bytes(buff, s1, s2, vtable_16, vzero,
5298+
vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3,
5299+
vtemp1, vtemp2, step_16, Assembler::m1);
5300+
5301+
// s1 = s1 % BASE
5302+
__ remuw(s1, s1, base);
5303+
// s2 = s2 % BASE
5304+
__ remuw(s2, s2, base);
5305+
5306+
__ sub(len, len, nmax);
5307+
__ sub(count, nmax, 16);
5308+
__ bgez(len, L_nmax_loop_entry);
5309+
5310+
__ bind(L_by16);
5311+
__ add(len, len, count);
5312+
__ bltz(len, L_by1);
5313+
// Trying to unroll
5314+
__ mv(temp3, step_64);
5315+
__ blt(len, temp3, L_by16_loop);
5316+
5317+
__ bind(L_by16_loop_unroll);
5318+
adler32_process_bytes(buff, s1, s2, vtable_64, vzero,
5319+
vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3,
5320+
vtemp1, vtemp2, step_64, Assembler::m4);
5321+
__ sub(len, len, step_64);
5322+
// By now the temp3 should still be 64
5323+
__ bge(len, temp3, L_by16_loop_unroll);
5324+
5325+
__ bind(L_by16_loop);
5326+
adler32_process_bytes(buff, s1, s2, vtable_16, vzero,
5327+
vbytes, vs1acc, vs2acc, temp0, temp1, temp2, temp3,
5328+
vtemp1, vtemp2, step_16, Assembler::m1);
5329+
__ sub(len, len, step_16);
5330+
__ bgez(len, L_by16_loop);
5331+
5332+
__ bind(L_by1);
5333+
__ add(len, len, 15);
5334+
__ bltz(len, L_do_mod);
5335+
5336+
__ bind(L_by1_loop);
5337+
__ lbu(temp0, Address(buff, 0));
5338+
__ addi(buff, buff, step_1);
5339+
__ add(s1, temp0, s1);
5340+
__ add(s2, s2, s1);
5341+
__ sub(len, len, step_1);
5342+
__ bgez(len, L_by1_loop);
5343+
5344+
__ bind(L_do_mod);
5345+
// s1 = s1 % BASE
5346+
__ remuw(s1, s1, base);
5347+
// s2 = s2 % BASE
5348+
__ remuw(s2, s2, base);
5349+
5350+
// Combine lower bits and higher bits
5351+
// adler = s1 | (s2 << 16)
5352+
__ bind(L_combine);
5353+
__ slli(s2, s2, 16);
5354+
__ orr(s1, s1, s2);
5355+
5356+
__ leave(); // Required for proper stackwalking of RuntimeStub frame
5357+
__ ret();
5358+
5359+
return start;
5360+
}
5361+
51065362
#endif // COMPILER2_OR_JVMCI
51075363

51085364
#ifdef COMPILER2
@@ -5746,6 +6002,10 @@ static const int64_t right_3_bits = right_n_bits(3);
57466002
StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
57476003
}
57486004

6005+
if (UseAdler32Intrinsics) {
6006+
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
6007+
}
6008+
57496009
#endif // COMPILER2_OR_JVMCI
57506010
}
57516011

src/hotspot/cpu/riscv/vm_version_riscv.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,18 @@ void VM_Version::initialize() {
239239
// as there are extra checks inside it which could disable UseRVV
240240
// in some situations.
241241

242+
// Adler32
243+
if (UseRVV) {
244+
if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
245+
FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
246+
}
247+
} else if (UseAdler32Intrinsics) {
248+
if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
249+
warning("Adler32 intrinsic requires RVV instructions (not available on this CPU).");
250+
}
251+
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
252+
}
253+
242254
// ChaCha20
243255
if (UseRVV) {
244256
if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {

src/hotspot/os/aix/os_aix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1846,7 +1846,7 @@ bool os::remove_stack_guard_pages(char* addr, size_t size) {
18461846
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
18471847
}
18481848

1849-
void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
1849+
void os::pd_disclaim_memory(char *addr, size_t bytes) {
18501850
}
18511851

18521852
size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {

src/hotspot/os/bsd/os_bsd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1684,7 +1684,7 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size,
16841684
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
16851685
}
16861686

1687-
void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
1687+
void os::pd_disclaim_memory(char *addr, size_t bytes) {
16881688
::madvise(addr, bytes, MADV_DONTNEED);
16891689
}
16901690

src/hotspot/os/linux/os_linux.cpp

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3035,15 +3035,10 @@ void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
30353035
}
30363036
}
30373037

3038-
void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) {
3039-
// This method works by doing an mmap over an existing mmaping and effectively discarding
3040-
// the existing pages. However it won't work for SHM-based large pages that cannot be
3041-
// uncommitted at all. We don't do anything in this case to avoid creating a segment with
3042-
// small pages on top of the SHM segment. This method always works for small pages, so we
3043-
// allow that in any case.
3044-
if (alignment_hint <= os::vm_page_size() || can_commit_large_page_memory()) {
3045-
commit_memory(addr, bytes, alignment_hint, !ExecMem);
3046-
}
3038+
// Hints to the OS that the memory is no longer needed and may be reclaimed by the OS when convenient.
3039+
// The memory will be re-acquired on touch without needing explicit recommitting.
3040+
void os::pd_disclaim_memory(char *addr, size_t bytes) {
3041+
::madvise(addr, bytes, MADV_DONTNEED);
30473042
}
30483043

30493044
size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {

src/hotspot/os/windows/os_windows.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3893,7 +3893,7 @@ bool os::unguard_memory(char* addr, size_t bytes) {
38933893
}
38943894

38953895
void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
3896-
void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) { }
3896+
void os::pd_disclaim_memory(char *addr, size_t bytes) { }
38973897

38983898
size_t os::pd_pretouch_memory(void* first, void* last, size_t page_size) {
38993899
return page_size;

0 commit comments

Comments
 (0)