From 54113e77d8bbd34dc87a0b92047e91e45c2962bf Mon Sep 17 00:00:00 2001 From: "yunyao.zxl" Date: Wed, 29 Sep 2021 21:29:48 +0800 Subject: [PATCH 1/3] [C-Ext] Fix a potential place needing uncompressed for MacroAssembler::far_call() --- src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp index 97953cafb0e..5d6956c62c4 100644 --- a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp +++ b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp @@ -2762,7 +2762,7 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { jalr_nc(x1, tmp, offset); // link } else { if (cbuf != NULL) { cbuf->set_insts_mark(); } - jal(entry); // link + jal_nc(entry); // link } } From 70b506db563e7e950fdb72154b88a8eb2abf5d34 Mon Sep 17 00:00:00 2001 From: "yunyao.zxl" Date: Mon, 11 Oct 2021 10:55:56 +0800 Subject: [PATCH 2/3] [C-Ext] Fix trampolines' alignment --- src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp index 5d6956c62c4..ed447fc9eb8 100644 --- a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp +++ b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp @@ -3193,7 +3193,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, // make sure 4 byte aligned here, so that the destination address would be // 8 byte aligned after 3 intructions - while (offset() % wordSize == 0) { nop(); } + // C-Ext: when we reach here we may get a 2-byte alignment and + // nop() will be 2 bytes in length. + while (offset() % wordSize != 4) { nop(); } relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset)); @@ -3208,6 +3210,7 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, bind(target); assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, "should be"); + assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64"); emit_int64((intptr_t)dest); const address stub_start_addr = addr_at(stub_start_offset); From ed248991f0ef7cadefca6a80fe39e146860d3698 Mon Sep 17 00:00:00 2001 From: "yunyao.zxl" Date: Thu, 14 Oct 2021 11:06:47 +0800 Subject: [PATCH 3/3] [C-Ext] Fix the cache line spanning problem for patchable CallNodes --- .../cpu/riscv64/c1_LIRAssembler_riscv64.cpp | 7 +++- src/hotspot/cpu/riscv64/riscv64.ad | 37 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp index 969f39af704..5a08e53d676 100644 --- a/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp +++ b/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp @@ -1334,7 +1334,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } } -void LIR_Assembler::align_call(LIR_Code code) { } +void LIR_Assembler::align_call(LIR_Code code) { + // C-Ext: With C-Ext a call may get 2-byte aligned. + // the address of jal itself (which will be patched later) should not span the cache line. + // See CallDynamicJavaDirectNode::compute_padding() for more info. + __ align(4); +} void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { address call = __ trampoline_call(Address(op->addr(), rtype)); diff --git a/src/hotspot/cpu/riscv64/riscv64.ad b/src/hotspot/cpu/riscv64/riscv64.ad index ac94e14136a..2214af91398 100644 --- a/src/hotspot/cpu/riscv64/riscv64.ad +++ b/src/hotspot/cpu/riscv64/riscv64.ad @@ -1208,6 +1208,41 @@ int MachCallNativeNode::ret_addr_offset() { return -1; } +// C-Ext: With C-Ext a call may get 2-byte aligned. +// The offset encoding in jal ranges bits [12, 31], which could span the cache line. +// Patching this unaligned address will make the write operation not atomic. +// Other threads may be running the same piece of code at full speed, causing concurrency issues. +// So we must ensure that it does not span a cache line so that it can be patched. +int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + +// C-Ext: With C-Ext a call may get 2-byte aligned. +// The offset encoding in jal ranges bits [12, 31], which could span the cache line. +// Patching this unaligned address will make the write operation not atomic. +// Other threads may be running the same piece of code at full speed, causing concurrency issues. +// So we must ensure that it does not span a cache line so that it can be patched. +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const +{ + // skip the movptr in MacroAssembler::ic_call(): + // lui + addi + slli(C) + addi + slli(C) + addi + // Though movptr() has already 4-byte aligned with or without C-Ext, + // We need to prevent from further changes by explicitly calculating the size. + const int instruction_size = NativeInstruction::instruction_size; + const int compressed_instruction_size = (!UseCExt ? instruction_size : NativeInstruction::compressed_instruction_size); + const int movptr_size = + 2 * instruction_size + + 1 * compressed_instruction_size + + 1 * instruction_size + + 1 * compressed_instruction_size + + 1 * instruction_size; + current_offset += movptr_size; + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + //============================================================================= #ifndef PRODUCT @@ -9760,6 +9795,7 @@ instruct CallStaticJavaDirect(method meth) riscv64_enc_call_epilog ); ins_pipe(pipe_class_call); + ins_alignment(4); %} // TO HERE @@ -9779,6 +9815,7 @@ instruct CallDynamicJavaDirect(method meth, iRegL_R6 cr) riscv64_enc_call_epilog ); ins_pipe(pipe_class_call); + ins_alignment(4); %} // Call Runtime Instruction