Skip to content
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1d6bc62
8322174: RISC-V: C2 VectorizedHashCode RVV Version
ygaevsky Jan 13, 2024
b976ca7
Removed checks for (MaxVectorSize >= 16) per @RealFYang suggestion.
ygaevsky Jan 25, 2024
7ed3d86
num_8b_elems_in_vec --> nof_vec_elems
ygaevsky Jan 25, 2024
301eed9
Merge master
ygaevsky Apr 24, 2025
10cf358
Merge branch 'master' into JDK-8322174
ygaevsky Apr 30, 2025
712cf05
Merge branch 'openjdk:master' into JDK-8322174
ygaevsky May 1, 2025
9ba2768
Fixed git rebase artifacts.
ygaevsky May 1, 2025
a64dc26
reorder instructions to make RVV instructions contiguous
ygaevsky May 5, 2025
4e9ad18
change slli+add sequence to shadd
ygaevsky May 5, 2025
6daaae6
simplified arrays_hashcode_v() to be closer to VLA and use less gener…
ygaevsky Jul 15, 2025
0c2fbee
- removed tail processing with RVV instructions as simple scalar loop…
ygaevsky Jul 15, 2025
c558db0
move vredsum_vs out of VEC_LOOP to improve performance
ygaevsky Aug 4, 2025
ffaba3d
make an experiment with lmul==1 instead of lmul==4.
ygaevsky Aug 4, 2025
bc1290e
fixed error made for prevoius lmul-m1 experiment
ygaevsky Aug 4, 2025
6c976c0
returned lmul==m4
ygaevsky Aug 4, 2025
da6644b
replaced vmul_vv + vadd_vv by vmadd_vv
ygaevsky Aug 5, 2025
e7fac6c
Merge master
ygaevsky Aug 5, 2025
bbcd1ec
make 'result' calculations scalar; clear vector registers only when n…
ygaevsky Aug 6, 2025
7c5f24a
make powmax calculations scalar; re-use v_tmp for sum reduction opera…
ygaevsky Aug 6, 2025
a85abe7
try m8 for grouping.
ygaevsky Aug 6, 2025
223e0a3
try m4 for grouping
ygaevsky Aug 6, 2025
424a453
try m2 for grouping
ygaevsky Aug 6, 2025
60b2d81
try m1 for grouping
ygaevsky Aug 6, 2025
e14cc8e
choose m2 as fastest per experiments
ygaevsky Aug 6, 2025
4449186
removed reservations for unused vector registers per reviewer's comme…
ygaevsky Aug 11, 2025
aaf930b
- addressed reviewer's comments/suggestions.
ygaevsky Aug 13, 2025
3fd5388
-more updates per reviewer's suggestions.
ygaevsky Aug 14, 2025
81356c2
- one more round of updates per recieved suggestions from reviewers.
ygaevsky Aug 15, 2025
38ae662
- minor updates requested by reviewer
ygaevsky Aug 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 133 additions & 16 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,7 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
Register tmp4, Register tmp5, Register tmp6,
BasicType eltype)
{
assert(!UseRVV, "sanity");
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, t0, t1);

const int elsize = arrays_hashcode_elsize(eltype);
Expand Down Expand Up @@ -1989,29 +1990,145 @@ void C2_MacroAssembler::arrays_hashcode(Register ary, Register cnt, Register res
BLOCK_COMMENT("} // arrays_hashcode");
}

void C2_MacroAssembler::arrays_hashcode_v(Register ary, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3,
BasicType eltype)
{
assert(UseRVV, "sanity");
assert(StubRoutines::riscv::arrays_hashcode_powers_of_31() != nullptr, "sanity");
assert_different_registers(ary, cnt, result, tmp1, tmp2, tmp3, t0, t1);

// The MaxVectorSize should have been set by detecting RVV max vector register
// size when check UseRVV (i.e. MaxVectorSize == VM_Version::_initial_vector_length).
// Let's use T_INT as all hashCode calculations eventually deal with ints.
const int lmul = 2;
const int stride = MaxVectorSize / sizeof(jint) * lmul;

const int elsize_bytes = arrays_hashcode_elsize(eltype);
const int elsize_shift = exact_log2(elsize_bytes);

switch (eltype) {
case T_BOOLEAN: BLOCK_COMMENT("arrays_hashcode_v(unsigned byte) {"); break;
case T_CHAR: BLOCK_COMMENT("arrays_hashcode_v(char) {"); break;
case T_BYTE: BLOCK_COMMENT("arrays_hashcode_v(byte) {"); break;
case T_SHORT: BLOCK_COMMENT("arrays_hashcode_v(short) {"); break;
case T_INT: BLOCK_COMMENT("arrays_hashcode_v(int) {"); break;
default:
ShouldNotReachHere();
}

const Register pow31_highest = tmp1;
const Register ary_end = tmp2;
const Register consumed = tmp3;

const VectorRegister v_sum = v2;
const VectorRegister v_src = v4;
const VectorRegister v_coeffs = v6;
const VectorRegister v_tmp = v8;

const address adr_pows31 = StubRoutines::riscv::arrays_hashcode_powers_of_31()
+ sizeof(jint);
Label VEC_LOOP, DONE, SCALAR_TAIL, SCALAR_TAIL_LOOP;

// NB: at this point (a) 'result' already has some value,
// (b) 'cnt' is not 0 or 1, see java code for details.

andi(t0, cnt, ~(stride - 1));
beqz(t0, SCALAR_TAIL);

vsetvli(t1, x0, Assembler::e32, Assembler::m2);
vmv_v_x(v_sum, x0);

la(t1, ExternalAddress(adr_pows31));
lw(pow31_highest, Address(t1, -1 * sizeof(jint)));
vle32_v(v_coeffs, t1); // 31^^(MaxVectorSize-1)...31^^0

vsetvli(consumed, cnt, Assembler::e32, Assembler::m2);

bind(VEC_LOOP);
arrays_hashcode_elload_v(v_src, v_tmp, ary, eltype);
vmul_vv(v_src, v_src, v_coeffs);
vmadd_vx(v_sum, pow31_highest, v_src);
mulw(result, result, pow31_highest);
shadd(ary, consumed, ary, t0, elsize_shift);
subw(cnt, cnt, consumed);
andi(t1, cnt, ~(stride - 1));
bnez(t1, VEC_LOOP);

vmv_s_x(v_tmp, x0);
vredsum_vs(v_sum, v_sum, v_tmp);
vmv_x_s(t0, v_sum);
addw(result, result, t0);
beqz(cnt, DONE);

bind(SCALAR_TAIL);
shadd(ary_end, cnt, ary, t0, elsize_shift);

bind(SCALAR_TAIL_LOOP);
arrays_hashcode_elload(t0, Address(ary), eltype);
slli(t1, result, 5); // optimize 31 * result
subw(result, t1, result); // with result<<5 - result
addw(result, result, t0);
addi(ary, ary, elsize_bytes);
bne(ary, ary_end, SCALAR_TAIL_LOOP);

bind(DONE);
BLOCK_COMMENT("} // arrays_hashcode_v");
}

int C2_MacroAssembler::arrays_hashcode_elsize(BasicType eltype) {
switch (eltype) {
case T_BOOLEAN: return sizeof(jboolean);
case T_BYTE: return sizeof(jbyte);
case T_SHORT: return sizeof(jshort);
case T_CHAR: return sizeof(jchar);
case T_INT: return sizeof(jint);
default:
ShouldNotReachHere();
return -1;
case T_BOOLEAN: return sizeof(jboolean);
case T_BYTE: return sizeof(jbyte);
case T_SHORT: return sizeof(jshort);
case T_CHAR: return sizeof(jchar);
case T_INT: return sizeof(jint);
default:
ShouldNotReachHere();
return -1;
}
}

void C2_MacroAssembler::arrays_hashcode_elload(Register dst, Address src, BasicType eltype) {
switch (eltype) {
// T_BOOLEAN used as surrogate for unsigned byte
case T_BOOLEAN: lbu(dst, src); break;
case T_BYTE: lb(dst, src); break;
case T_SHORT: lh(dst, src); break;
case T_CHAR: lhu(dst, src); break;
case T_INT: lw(dst, src); break;
default:
ShouldNotReachHere();
// T_BOOLEAN used as surrogate for unsigned byte
case T_BOOLEAN: lbu(dst, src); break;
case T_BYTE: lb(dst, src); break;
case T_SHORT: lh(dst, src); break;
case T_CHAR: lhu(dst, src); break;
case T_INT: lw(dst, src); break;
default:
ShouldNotReachHere();
}
}

void C2_MacroAssembler::arrays_hashcode_elload_v(VectorRegister vdst,
VectorRegister vtmp,
Register src,
BasicType eltype) {
assert_different_registers(vdst, vtmp);
switch (eltype) {
case T_BOOLEAN:
vle8_v(vtmp, src);
vzext_vf4(vdst, vtmp);
break;
case T_BYTE:
vle8_v(vtmp, src);
vsext_vf4(vdst, vtmp);
break;
case T_CHAR:
vle16_v(vtmp, src);
vzext_vf2(vdst, vtmp);
break;
case T_SHORT:
vle16_v(vtmp, src);
vsext_vf2(vdst, vtmp);
break;
case T_INT:
vle32_v(vdst, src);
break;
default:
ShouldNotReachHere();
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,15 @@
Register tmp3, Register tmp4,
Register tmp5, Register tmp6,
BasicType eltype);

// helper function for arrays_hashcode
int arrays_hashcode_elsize(BasicType eltype);
void arrays_hashcode_elload(Register dst, Address src, BasicType eltype);

void arrays_hashcode_v(Register ary, Register cnt, Register result,
Register tmp1, Register tmp2, Register tmp3,
BasicType eltype);
void arrays_hashcode_elload_v(VectorRegister vdst, VectorRegister vtmp,
Register src, BasicType eltype);

void string_equals(Register r1, Register r2,
Register result, Register cnt1);

Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -10969,6 +10969,7 @@ instruct arrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI ba
iRegLNoSp tmp3, iRegLNoSp tmp4,
iRegLNoSp tmp5, iRegLNoSp tmp6, rFlagsReg cr)
%{
predicate(!UseRVV);
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6,
USE_KILL ary, USE_KILL cnt, USE basic_type, KILL cr);
Expand Down
22 changes: 22 additions & 0 deletions src/hotspot/cpu/riscv/riscv_v.ad
Original file line number Diff line number Diff line change
Expand Up @@ -4079,6 +4079,28 @@ instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
ins_pipe(pipe_class_memory);
%}

// fast ArraysSupport.vectorizedHashCode
instruct varrays_hashcode(iRegP_R11 ary, iRegI_R12 cnt, iRegI_R10 result, immI basic_type,
vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
vReg_V6 v6, vReg_V7 v7, vReg_V8 v8, vReg_V9 v9,
iRegLNoSp tmp1, iRegLNoSp tmp2, iRegLNoSp tmp3,
rFlagsReg cr)
%{
predicate(UseRVV);
match(Set result (VectorizedHashCode (Binary ary cnt) (Binary result basic_type)));
effect(USE_KILL ary, USE_KILL cnt, USE basic_type,
TEMP v2, TEMP v3, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);

format %{ "Array HashCode array[] $ary,$cnt,$result,$basic_type -> $result // KILL all" %}
ins_encode %{
__ arrays_hashcode_v($ary$$Register, $cnt$$Register, $result$$Register,
$tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
(BasicType)$basic_type$$constant);
%}
ins_pipe(pipe_class_memory);
%}

instruct vstring_compareU_128b(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/cpu/riscv/stubDeclarations_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@
do_stub(compiler, string_indexof_linear_ul) \
do_arch_entry(riscv, compiler, string_indexof_linear_ul, \
string_indexof_linear_ul, string_indexof_linear_ul) \
do_stub(compiler, arrays_hashcode_powers_of_31) \
do_arch_entry(riscv, compiler, arrays_hashcode_powers_of_31, \
arrays_hashcode_powers_of_31, arrays_hashcode_powers_of_31) \


#define STUBGEN_FINAL_BLOBS_ARCH_DO(do_stub, \
Expand Down
22 changes: 22 additions & 0 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6581,6 +6581,24 @@ static const int64_t right_3_bits = right_n_bits(3);
return start;
}

address generate_arrays_hashcode_powers_of_31() {
assert(UseRVV, "sanity");
const int lmul = 2;
const int stride = MaxVectorSize / sizeof(jint) * lmul;
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "arrays_hashcode_powers_of_31");
address start = __ pc();
for (int i = stride; i >= 0; i--) {
jint power_of_31 = 1;
for (int j = i; j > 0; j--) {
power_of_31 = java_multiply(power_of_31, 31);
}
__ emit_int32(power_of_31);
}

return start;
}

#endif // COMPILER2

/**
Expand Down Expand Up @@ -6775,6 +6793,10 @@ static const int64_t right_3_bits = right_n_bits(3);
StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
}

if (UseVectorizedHashCodeIntrinsic && UseRVV) {
StubRoutines::riscv::_arrays_hashcode_powers_of_31 = generate_arrays_hashcode_powers_of_31();
}

if (UseSHA256Intrinsics) {
Sha2Generator sha2(_masm, this);
StubRoutines::_sha256_implCompress = sha2.generate_sha256_implCompress(StubId::stubgen_sha256_implCompress_id);
Expand Down