From 362ab57879e8e888d7bd4fe29b2b81c8472ee5ed Mon Sep 17 00:00:00 2001 From: "yunyao.zxl" Date: Mon, 8 Nov 2021 16:24:11 +0800 Subject: [PATCH] Support RVC: compressed instructions --- src/hotspot/cpu/riscv/assembler_riscv.cpp | 138 ++- src/hotspot/cpu/riscv/assembler_riscv.hpp | 281 +++--- .../cpu/riscv/assembler_riscv_cext.hpp | 865 ++++++++++++++++++ src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 8 +- .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 15 +- .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 2 +- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 8 +- .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 16 +- .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 10 + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 2 +- .../riscv/c2_safepointPollStubTable_riscv.cpp | 2 +- src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 8 +- .../gc/shared/barrierSetAssembler_riscv.cpp | 30 +- .../gc/shared/barrierSetNMethod_riscv.cpp | 93 +- .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp | 4 +- src/hotspot/cpu/riscv/globals_riscv.hpp | 5 +- src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 4 +- .../cpu/riscv/jniFastGetField_riscv.cpp | 8 +- .../cpu/riscv/macroAssembler_riscv.cpp | 248 +++-- .../cpu/riscv/macroAssembler_riscv.hpp | 31 +- src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 173 +++- src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 188 +++- src/hotspot/cpu/riscv/register_riscv.hpp | 18 +- src/hotspot/cpu/riscv/riscv.ad | 85 +- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 46 +- src/hotspot/cpu/riscv/vm_version_riscv.cpp | 6 + src/hotspot/share/c1/c1_CodeStubs.hpp | 2 +- src/hotspot/share/c1/c1_LIRAssembler.cpp | 2 +- .../flags/jvmFlagConstraintsCompiler.cpp | 2 +- 29 files changed, 1810 insertions(+), 490 deletions(-) create mode 100644 src/hotspot/cpu/riscv/assembler_riscv_cext.hpp diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp index b4da68e3202..1bd7588b983 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp @@ -34,6 +34,7 @@ #include "memory/resourceArea.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/sharedRuntime.hpp" +#include "nativeInst_riscv.hpp" int AbstractAssembler::code_fill_byte() { return 0; @@ -80,6 +81,11 @@ void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) } void Assembler::li(Register Rd, int64_t imm) { + if (UseRVC && is_imm_in_range(imm, 6, 0) && Rd != x0) { + li_c(Rd, imm); + return; + } + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff int shift = 12; int64_t upper = imm, lower = imm; @@ -124,18 +130,18 @@ void Assembler::li64(Register Rd, int64_t imm) { lo = (lo << 52) >> 52; up -= lo; up = (int32_t)up; - lui(Rd, up); - addi(Rd, Rd, lo); + lui_nc(Rd, up); + addi_nc(Rd, Rd, lo); // Load the rest 32 bits. slli(Rd, Rd, 12); - addi(Rd, Rd, (int32_t)lower >> 20); + addi_nc(Rd, Rd, (int32_t)lower >> 20); slli(Rd, Rd, 12); lower = ((int32_t)imm << 12) >> 20; - addi(Rd, Rd, lower); + addi_nc(Rd, Rd, lower); slli(Rd, Rd, 8); lower = imm & 0xff; - addi(Rd, Rd, lower); + addi_nc(Rd, Rd, lower); } void Assembler::li32(Register Rd, int32_t imm) { @@ -145,40 +151,48 @@ void Assembler::li32(Register Rd, int32_t imm) { upper -= lower; upper = (int32_t)upper; // lui Rd, imm[31:12] + imm[11] - lui(Rd, upper); + lui_nc(Rd, upper); // use addiw to distinguish li32 to li64 - addiw(Rd, Rd, lower); + addiw_nc(Rd, Rd, lower); } -#define INSN(NAME, REGISTER) \ +#define INSN(NAME, REGISTER, C) \ void Assembler::NAME(const address &dest, Register temp) { \ assert_cond(dest != NULL); \ int64_t distance = dest - pc(); \ if (is_imm_in_range(distance, 20, 1)) { \ - jal(REGISTER, distance); \ + EMIT_MAY_COMPRESS_NAME(C, jal, (REGISTER, distance)); \ } else { \ assert(temp != noreg, "temp must not be empty register!"); \ int32_t offset = 0; \ - movptr_with_offset(temp, dest, offset); \ - jalr(REGISTER, temp, offset); \ + movptr_with_offset(temp, dest, offset, C); \ + EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, offset)); \ } \ } \ void Assembler::NAME(Label &l, Register temp) { \ - jal(REGISTER, l, temp); \ + EMIT_MAY_COMPRESS_NAME(C, jal, (REGISTER, l, temp)); \ } \ - INSN(j, x0); - INSN(jal, x1); + INSN(j, x0, COMPRESSIBLE); + INSN(jal, x1, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(j_nc, x0, NOT_COMPRESSIBLE); + INSN(jal_nc, x1, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME, REGISTER) \ +#define INSN(NAME, REGISTER, C) \ void Assembler::NAME(Register Rs) { \ - jalr(REGISTER, Rs, 0); \ + EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, Rs, 0)); \ } - INSN(jr, x0); - INSN(jalr, x1); + INSN(jr, x0, COMPRESSIBLE); + INSN(jalr, x1, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(jr_nc, x0, NOT_COMPRESSIBLE); + INSN(jalr_nc, x1, NOT_COMPRESSIBLE); #undef INSN @@ -186,32 +200,36 @@ void Assembler::ret() { jalr(x0, x1, 0); } -#define INSN(NAME, REGISTER) \ - void Assembler::NAME(const address &dest, Register temp) { \ - assert_cond(dest != NULL); \ - assert(temp != noreg, "temp must not be empty register!"); \ - int64_t distance = dest - pc(); \ - if (is_offset_in_range(distance, 32)) { \ - auipc(temp, distance + 0x800); \ - jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20); \ - } else { \ - int32_t offset = 0; \ - movptr_with_offset(temp, dest, offset); \ - jalr(REGISTER, temp, offset); \ - } \ +#define INSN(NAME, REGISTER, C) \ + void Assembler::NAME(const address &dest, Register temp) { \ + assert_cond(dest != NULL); \ + assert(temp != noreg, "temp must not be empty register!"); \ + int64_t distance = dest - pc(); \ + if (is_offset_in_range(distance, 32)) { \ + auipc(temp, distance + 0x800); \ + EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, ((int32_t)distance << 20) >> 20)); \ + } else { \ + int32_t offset = 0; \ + movptr_with_offset(temp, dest, offset, C); \ + EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, offset)); \ + } \ } - INSN(call, x1); - INSN(tail, x0); + INSN(call, x1, COMPRESSIBLE); + INSN(tail, x0, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(call_nc, x1, NOT_COMPRESSIBLE); + INSN(tail_nc, x0, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME, REGISTER) \ +#define INSN(NAME, REGISTER, NAME_NC) \ void Assembler::NAME(const Address &adr, Register temp) { \ switch(adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ - NAME(adr.target(), temp); \ + NAME_NC(adr.target(), temp); \ break; \ } \ case Address::base_plus_offset:{ \ @@ -225,10 +243,14 @@ void Assembler::ret() { } \ } - INSN(j, x0); - INSN(jal, x1); - INSN(call, x1); - INSN(tail, x0); + INSN(j, x0, j_nc); + INSN(jal, x1, jal_nc); + INSN(call, x1, call_nc); + INSN(tail, x0, tail_nc); + + // C-Ext: incompressible version + INSN(j_nc, x0, j_nc); + INSN(jal_nc, x1, jal_nc); #undef INSN @@ -237,7 +259,7 @@ void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branc if (is_far) { Label done; (this->*neg_insn)(r1, r2, done, /* is_far */ false); - j(L); + j_nc(L); bind(done); } else { if (L.is_bound()) { @@ -267,7 +289,25 @@ void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { } } -void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { +void Assembler::wrap_label(Label &L, j_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } +} + +void Assembler::wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc()); + } +} + +void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset, bool compressible) { uintptr_t imm64 = (uintptr_t)addr; #ifndef PRODUCT { @@ -283,26 +323,26 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { lower = (lower << 52) >> 52; upper -= lower; upper = (int32_t)upper; - lui(Rd, upper); - addi(Rd, Rd, lower); + EMIT_MAY_COMPRESS_INST(compressible, lui, (Rd, upper)); + EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, lower)); // Load the rest 16 bits. slli(Rd, Rd, 11); - addi(Rd, Rd, (imm64 >> 5) & 0x7ff); + EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, (imm64 >> 5) & 0x7ff)); slli(Rd, Rd, 5); // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld. offset = imm64 & 0x1f; } -void Assembler::movptr(Register Rd, uintptr_t imm64) { - movptr(Rd, (address)imm64); +void Assembler::movptr(Register Rd, uintptr_t imm64, bool compressible) { + movptr(Rd, (address)imm64, compressible); } -void Assembler::movptr(Register Rd, address addr) { +void Assembler::movptr(Register Rd, address addr, bool compressible) { int offset = 0; - movptr_with_offset(Rd, addr, offset); - addi(Rd, Rd, offset); + movptr_with_offset(Rd, addr, offset, compressible); + EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, offset)); } void Assembler::ifence() { diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 3564b3669b5..3da5009c7e6 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -255,6 +255,7 @@ class InternalAddress: public Address { class Assembler : public AbstractAssembler { public: +#include "assembler_riscv_cext.hpp" enum { instruction_size = 4 }; @@ -304,9 +305,9 @@ class Assembler : public AbstractAssembler { void li(Register Rd, int64_t imm); // optimized load immediate void li32(Register Rd, int32_t imm); void li64(Register Rd, int64_t imm); - void movptr(Register Rd, address addr); - void movptr_with_offset(Register Rd, address addr, int32_t &offset); - void movptr(Register Rd, uintptr_t imm64); + void movptr(Register Rd, address addr, bool COMPRESSIBLE = true); + void movptr_with_offset(Register Rd, address addr, int32_t &offset, bool COMPRESSIBLE = true); + void movptr(Register Rd, uintptr_t imm64, bool COMPRESSIBLE = true); void ifence(); void j(const address &dest, Register temp = t0); void j(const Address &adr, Register temp = t0) ; @@ -379,12 +380,21 @@ class Assembler : public AbstractAssembler { } void halt() { - emit_int32(0); + if (UseRVC) { + emit_int16(0); + } else { + emit_int32(0); + } } -// Rigster Instruction -#define INSN(NAME, op, funct3, funct7) \ +// two C-Ext macros +#define COMPRESSIBLE true +#define NOT_COMPRESSIBLE false + +// Register Instruction +#define INSN(NAME, op, funct3, funct7, C) \ void NAME(Register Rd, Register Rs1, Register Rs2) { \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, Rs2) \ unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -395,37 +405,37 @@ class Assembler : public AbstractAssembler { emit(insn); \ } - INSN(add, 0b0110011, 0b000, 0b0000000); - INSN(sub, 0b0110011, 0b000, 0b0100000); - INSN(andr, 0b0110011, 0b111, 0b0000000); - INSN(orr, 0b0110011, 0b110, 0b0000000); - INSN(xorr, 0b0110011, 0b100, 0b0000000); - INSN(sll, 0b0110011, 0b001, 0b0000000); - INSN(sra, 0b0110011, 0b101, 0b0100000); - INSN(srl, 0b0110011, 0b101, 0b0000000); - INSN(slt, 0b0110011, 0b010, 0b0000000); - INSN(sltu, 0b0110011, 0b011, 0b0000000); - INSN(addw, 0b0111011, 0b000, 0b0000000); - INSN(subw, 0b0111011, 0b000, 0b0100000); - INSN(sllw, 0b0111011, 0b001, 0b0000000); - INSN(sraw, 0b0111011, 0b101, 0b0100000); - INSN(srlw, 0b0111011, 0b101, 0b0000000); - INSN(mul, 0b0110011, 0b000, 0b0000001); - INSN(mulh, 0b0110011, 0b001, 0b0000001); - INSN(mulhsu,0b0110011, 0b010, 0b0000001); - INSN(mulhu, 0b0110011, 0b011, 0b0000001); - INSN(mulw, 0b0111011, 0b000, 0b0000001); - INSN(div, 0b0110011, 0b100, 0b0000001); - INSN(divu, 0b0110011, 0b101, 0b0000001); - INSN(divw, 0b0111011, 0b100, 0b0000001); - INSN(divuw, 0b0111011, 0b101, 0b0000001); - INSN(rem, 0b0110011, 0b110, 0b0000001); - INSN(remu, 0b0110011, 0b111, 0b0000001); - INSN(remw, 0b0111011, 0b110, 0b0000001); - INSN(remuw, 0b0111011, 0b111, 0b0000001); + INSN(add, 0b0110011, 0b000, 0b0000000, COMPRESSIBLE); + INSN(sub, 0b0110011, 0b000, 0b0100000, COMPRESSIBLE); + INSN(andr, 0b0110011, 0b111, 0b0000000, COMPRESSIBLE); + INSN(orr, 0b0110011, 0b110, 0b0000000, COMPRESSIBLE); + INSN(xorr, 0b0110011, 0b100, 0b0000000, COMPRESSIBLE); + INSN(sll, 0b0110011, 0b001, 0b0000000, NOT_COMPRESSIBLE); + INSN(sra, 0b0110011, 0b101, 0b0100000, NOT_COMPRESSIBLE); + INSN(srl, 0b0110011, 0b101, 0b0000000, NOT_COMPRESSIBLE); + INSN(slt, 0b0110011, 0b010, 0b0000000, NOT_COMPRESSIBLE); + INSN(sltu, 0b0110011, 0b011, 0b0000000, NOT_COMPRESSIBLE); + INSN(addw, 0b0111011, 0b000, 0b0000000, COMPRESSIBLE); + INSN(subw, 0b0111011, 0b000, 0b0100000, COMPRESSIBLE); + INSN(sllw, 0b0111011, 0b001, 0b0000000, NOT_COMPRESSIBLE); + INSN(sraw, 0b0111011, 0b101, 0b0100000, NOT_COMPRESSIBLE); + INSN(srlw, 0b0111011, 0b101, 0b0000000, NOT_COMPRESSIBLE); + INSN(mul, 0b0110011, 0b000, 0b0000001, NOT_COMPRESSIBLE); + INSN(mulh, 0b0110011, 0b001, 0b0000001, NOT_COMPRESSIBLE); + INSN(mulhsu,0b0110011, 0b010, 0b0000001, NOT_COMPRESSIBLE); + INSN(mulhu, 0b0110011, 0b011, 0b0000001, NOT_COMPRESSIBLE); + INSN(mulw, 0b0111011, 0b000, 0b0000001, NOT_COMPRESSIBLE); + INSN(div, 0b0110011, 0b100, 0b0000001, NOT_COMPRESSIBLE); + INSN(divu, 0b0110011, 0b101, 0b0000001, NOT_COMPRESSIBLE); + INSN(divw, 0b0111011, 0b100, 0b0000001, NOT_COMPRESSIBLE); + INSN(divuw, 0b0111011, 0b101, 0b0000001, NOT_COMPRESSIBLE); + INSN(rem, 0b0110011, 0b110, 0b0000001, NOT_COMPRESSIBLE); + INSN(remu, 0b0110011, 0b111, 0b0000001, NOT_COMPRESSIBLE); + INSN(remw, 0b0111011, 0b110, 0b0000001, NOT_COMPRESSIBLE); + INSN(remuw, 0b0111011, 0b111, 0b0000001, NOT_COMPRESSIBLE); // Vector Configuration Instruction - INSN(vsetvl, 0b1010111, 0b111, 0b1000000); + INSN(vsetvl, 0b1010111, 0b111, 0b1000000, NOT_COMPRESSIBLE); #undef INSN @@ -437,10 +447,11 @@ class Assembler : public AbstractAssembler { code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); // Load/store register (all modes) -#define INSN(NAME, op, funct3) \ +#define INSN(NAME, op, funct3, NAME_NC, C) \ void NAME(Register Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset) \ + unsigned insn = 0; \ int32_t val = offset & 0xfff; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -457,18 +468,18 @@ class Assembler : public AbstractAssembler { NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ } else { \ int32_t offset = 0; \ - movptr_with_offset(Rd, dest, offset); \ + movptr_with_offset(Rd, dest, offset, C); \ NAME(Rd, Rd, offset); \ } \ } \ INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ - NAME(Rd, dest); \ + NAME_NC(Rd, dest); \ } \ void NAME(Register Rd, const Address &adr, Register temp = t0) { \ switch(adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ - NAME(Rd, adr.target()); \ + NAME_NC(Rd, adr.target()); \ break; \ } \ case Address::base_plus_offset:{ \ @@ -494,20 +505,24 @@ class Assembler : public AbstractAssembler { wrap_label(Rd, L, &Assembler::NAME); \ } - INSN(lb, 0b0000011, 0b000); - INSN(lbu, 0b0000011, 0b100); - INSN(ld, 0b0000011, 0b011); - INSN(lh, 0b0000011, 0b001); - INSN(lhu, 0b0000011, 0b101); - INSN(lw, 0b0000011, 0b010); - INSN(lwu, 0b0000011, 0b110); + INSN(lb, 0b0000011, 0b000, lb, NOT_COMPRESSIBLE); + INSN(lbu, 0b0000011, 0b100, lbu, NOT_COMPRESSIBLE); + INSN(lh, 0b0000011, 0b001, lh, NOT_COMPRESSIBLE); + INSN(lhu, 0b0000011, 0b101, lhu, NOT_COMPRESSIBLE); + INSN(lw, 0b0000011, 0b010, lw_nc, COMPRESSIBLE); + INSN(lwu, 0b0000011, 0b110, lwu, NOT_COMPRESSIBLE); + INSN(ld, 0b0000011, 0b011, ld_nc, COMPRESSIBLE); + // C-Ext: incompressible version + INSN(lw_nc, 0b0000011, 0b010, lw_nc, NOT_COMPRESSIBLE); + INSN(ld_nc, 0b0000011, 0b011, ld_nc, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME, op, funct3) \ +#define INSN(NAME, op, funct3, NAME_NC, C) \ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset) \ + unsigned insn = 0; \ uint32_t val = offset & 0xfff; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -524,18 +539,18 @@ class Assembler : public AbstractAssembler { NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ } else { \ int32_t offset = 0; \ - movptr_with_offset(temp, dest, offset); \ + movptr_with_offset(temp, dest, offset, C); \ NAME(Rd, temp, offset); \ } \ } \ INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \ - NAME(Rd, dest, temp); \ + NAME_NC(Rd, dest, temp); \ } \ void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ switch(adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ - NAME(Rd, adr.target(), temp); \ + NAME_NC(Rd, adr.target(), temp); \ break; \ } \ case Address::base_plus_offset:{ \ @@ -553,14 +568,18 @@ class Assembler : public AbstractAssembler { } \ } - INSN(flw, 0b0000111, 0b010); - INSN(fld, 0b0000111, 0b011); + INSN(flw, 0b0000111, 0b010, flw, NOT_COMPRESSIBLE); + INSN(fld, 0b0000111, 0b011, fld_nc, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(fld_nc, 0b0000111, 0b011, fld_nc, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME, op, funct3) \ +#define INSN(NAME, op, funct3, NAME_NC, C) \ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ - unsigned insn = 0; \ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rs1, Rs2, offset) \ + unsigned insn = 0; \ uint32_t val = offset & 0x1fff; \ uint32_t val11 = (val >> 11) & 0x1; \ uint32_t val12 = (val >> 12) & 0x1; \ @@ -583,15 +602,19 @@ class Assembler : public AbstractAssembler { NAME(Rs1, Rs2, offset); \ } \ INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ - NAME(Rs1, Rs2, dest); \ + NAME_NC(Rs1, Rs2, dest); \ } - INSN(beq, 0b1100011, 0b000); - INSN(bge, 0b1100011, 0b101); - INSN(bgeu, 0b1100011, 0b111); - INSN(blt, 0b1100011, 0b100); - INSN(bltu, 0b1100011, 0b110); - INSN(bne, 0b1100011, 0b001); + INSN(beq, 0b1100011, 0b000, beq_nc, COMPRESSIBLE); + INSN(bne, 0b1100011, 0b001, bne_nc, COMPRESSIBLE); + INSN(bge, 0b1100011, 0b101, bge, NOT_COMPRESSIBLE); + INSN(bgeu, 0b1100011, 0b111, bgeu, NOT_COMPRESSIBLE); + INSN(blt, 0b1100011, 0b100, blt, NOT_COMPRESSIBLE); + INSN(bltu, 0b1100011, 0b110, bltu, NOT_COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(beq_nc, 0b1100011, 0b000, beq_nc, NOT_COMPRESSIBLE); + INSN(bne_nc, 0b1100011, 0b001, bne_nc, NOT_COMPRESSIBLE); #undef INSN @@ -607,12 +630,17 @@ class Assembler : public AbstractAssembler { INSN(bltu, bgeu); INSN(bgeu, bltu); + // C-Ext: incompressible version + INSN(beq_nc, bne_nc); + INSN(bne_nc, beq_nc); + #undef INSN -#define INSN(NAME, REGISTER, op, funct3) \ +#define INSN(NAME, REGISTER, op, funct3, NAME_NC, C) \ void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rs1, Rs2, offset) \ + unsigned insn = 0; \ uint32_t val = offset & 0xfff; \ uint32_t low = val & 0x1f; \ uint32_t high = (val >> 5) & 0x7f; \ @@ -625,19 +653,24 @@ class Assembler : public AbstractAssembler { emit(insn); \ } \ INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ - NAME(Rs, dest, temp); \ + NAME_NC(Rs, dest, temp); \ } - INSN(sb, Register, 0b0100011, 0b000); - INSN(sh, Register, 0b0100011, 0b001); - INSN(sw, Register, 0b0100011, 0b010); - INSN(sd, Register, 0b0100011, 0b011); - INSN(fsw, FloatRegister, 0b0100111, 0b010); - INSN(fsd, FloatRegister, 0b0100111, 0b011); + INSN(sb, Register, 0b0100011, 0b000, sb, NOT_COMPRESSIBLE); + INSN(sh, Register, 0b0100011, 0b001, sh, NOT_COMPRESSIBLE); + INSN(sw, Register, 0b0100011, 0b010, sw_nc, COMPRESSIBLE); + INSN(sd, Register, 0b0100011, 0b011, sd_nc, COMPRESSIBLE); + INSN(fsw, FloatRegister, 0b0100111, 0b010, fsw, NOT_COMPRESSIBLE); + INSN(fsd, FloatRegister, 0b0100111, 0b011, fsd_nc, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(sw_nc, Register, 0b0100011, 0b010, sw_nc, NOT_COMPRESSIBLE); + INSN(sd_nc, Register, 0b0100011, 0b011, sd_nc, NOT_COMPRESSIBLE); + INSN(fsd_nc, FloatRegister, 0b0100111, 0b011, fsd_nc, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME) \ +#define INSN(NAME, NAME_NC, C) \ void NAME(Register Rs, address dest, Register temp = t0) { \ assert_cond(dest != NULL); \ assert_different_registers(Rs, temp); \ @@ -647,7 +680,7 @@ class Assembler : public AbstractAssembler { NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ } else { \ int32_t offset = 0; \ - movptr_with_offset(temp, dest, offset); \ + movptr_with_offset(temp, dest, offset, C); \ NAME(Rs, temp, offset); \ } \ } \ @@ -656,7 +689,7 @@ class Assembler : public AbstractAssembler { case Address::literal: { \ assert_different_registers(Rs, temp); \ code_section()->relocate(pc(), adr.rspec()); \ - NAME(Rs, adr.target(), temp); \ + NAME_NC(Rs, adr.target(), temp); \ break; \ } \ case Address::base_plus_offset:{ \ @@ -675,14 +708,18 @@ class Assembler : public AbstractAssembler { } \ } - INSN(sb); - INSN(sh); - INSN(sw); - INSN(sd); + INSN(sb, sb, NOT_COMPRESSIBLE); + INSN(sh, sh, NOT_COMPRESSIBLE); + INSN(sw, sw_nc, COMPRESSIBLE); + INSN(sd, sd_nc, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(sw_nc, sw_nc, NOT_COMPRESSIBLE); + INSN(sd_nc, sd_nc, NOT_COMPRESSIBLE); #undef INSN -#define INSN(NAME) \ +#define INSN(NAME, NAME_NC, C) \ void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ assert_cond(dest != NULL); \ int64_t distance = (dest - pc()); \ @@ -691,7 +728,7 @@ class Assembler : public AbstractAssembler { NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ } else { \ int32_t offset = 0; \ - movptr_with_offset(temp, dest, offset); \ + movptr_with_offset(temp, dest, offset, C); \ NAME(Rs, temp, offset); \ } \ } \ @@ -699,7 +736,7 @@ class Assembler : public AbstractAssembler { switch(adr.getMode()) { \ case Address::literal: { \ code_section()->relocate(pc(), adr.rspec()); \ - NAME(Rs, adr.target(), temp); \ + NAME_NC(Rs, adr.target(), temp); \ break; \ } \ case Address::base_plus_offset:{ \ @@ -717,8 +754,11 @@ class Assembler : public AbstractAssembler { } \ } - INSN(fsw); - INSN(fsd); + INSN(fsw, fsw, NOT_COMPRESSIBLE); + INSN(fsd, fsd_nc, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(fsd_nc, fsd_nc, NOT_COMPRESSIBLE); #undef INSN @@ -760,10 +800,11 @@ class Assembler : public AbstractAssembler { #undef INSN -#define INSN(NAME, op) \ +#define INSN(NAME, op, C) \ void NAME(Register Rd, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, offset) \ + unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ @@ -780,8 +821,8 @@ class Assembler : public AbstractAssembler { } else { \ assert_different_registers(Rd, temp); \ int32_t off = 0; \ - movptr_with_offset(temp, dest, off); \ - jalr(Rd, temp, off); \ + movptr_with_offset(temp, dest, off, C); \ + EMIT_MAY_COMPRESS_NAME(C, jalr, (Rd, temp, off)); \ } \ } \ void NAME(Register Rd, Label &L, Register temp = t0) { \ @@ -789,16 +830,20 @@ class Assembler : public AbstractAssembler { wrap_label(Rd, L, temp, &Assembler::NAME); \ } - INSN(jal, 0b1101111); + INSN(jal, 0b1101111, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(jal_nc, 0b1101111, NOT_COMPRESSIBLE); #undef INSN #undef INSN_ENTRY_RELOC -#define INSN(NAME, op, funct) \ +#define INSN(NAME, op, funct, C) \ void NAME(Register Rd, Register Rs, const int32_t offset) { \ - unsigned insn = 0; \ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset) \ + unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch_reg((address)&insn, 7, Rd); \ patch((address)&insn, 14, 12, funct); \ @@ -808,7 +853,10 @@ class Assembler : public AbstractAssembler { emit(insn); \ } - INSN(jalr, 0b1100111, 0b000); + INSN(jalr, 0b1100111, 0b000, COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(jalr_nc, 0b1100111, 0b000, NOT_COMPRESSIBLE); #undef INSN @@ -831,8 +879,9 @@ class Assembler : public AbstractAssembler { emit(insn); } -#define INSN(NAME, op, funct3, funct7) \ +#define INSN(NAME, op, funct3, funct7, C) \ void NAME() { \ + EMIT_MAY_COMPRESS(C, NAME) \ unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 11, 7, 0b00000); \ @@ -842,9 +891,9 @@ class Assembler : public AbstractAssembler { emit(insn); \ } - INSN(fence_i, 0b0001111, 0b001, 0b000000000000); - INSN(ecall, 0b1110011, 0b000, 0b000000000000); - INSN(ebreak, 0b1110011, 0b000, 0b000000000001); + INSN(fence_i, 0b0001111, 0b001, 0b000000000000, NOT_COMPRESSIBLE); + INSN(ecall, 0b1110011, 0b000, 0b000000000000, NOT_COMPRESSIBLE); + INSN(ebreak, 0b1110011, 0b000, 0b000000000001, COMPRESSIBLE); #undef INSN enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; @@ -940,9 +989,10 @@ enum operand_size { int8, int16, int32, uint32, int64 }; #undef INSN // Immediate Instruction -#define INSN(NAME, op, funct3) \ +#define INSN(NAME, op, funct3, C) \ void NAME(Register Rd, Register Rs1, int32_t imm) { \ guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, imm) \ unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -952,12 +1002,16 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(addi, 0b0010011, 0b000); - INSN(slti, 0b0010011, 0b010); - INSN(addiw, 0b0011011, 0b000); - INSN(and_imm12, 0b0010011, 0b111); - INSN(ori, 0b0010011, 0b110); - INSN(xori, 0b0010011, 0b100); + INSN(addi, 0b0010011, 0b000, COMPRESSIBLE); + INSN(slti, 0b0010011, 0b010, NOT_COMPRESSIBLE); + INSN(addiw, 0b0011011, 0b000, COMPRESSIBLE); + INSN(and_imm12, 0b0010011, 0b111, COMPRESSIBLE); + INSN(ori, 0b0010011, 0b110, NOT_COMPRESSIBLE); + INSN(xori, 0b0010011, 0b100, NOT_COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(addi_nc, 0b0010011, 0b000, NOT_COMPRESSIBLE); + INSN(addiw_nc, 0b0011011, 0b000, NOT_COMPRESSIBLE); #undef INSN @@ -978,9 +1032,10 @@ enum operand_size { int8, int16, int32, uint32, int64 }; #undef INSN // Shift Immediate Instruction -#define INSN(NAME, op, funct3, funct6) \ +#define INSN(NAME, op, funct3, funct6, C) \ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ guarantee(shamt <= 0x3f, "Shamt is invalid"); \ + EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, shamt) \ unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ patch((address)&insn, 14, 12, funct3); \ @@ -991,9 +1046,9 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(slli, 0b0010011, 0b001, 0b000000); - INSN(srai, 0b0010011, 0b101, 0b010000); - INSN(srli, 0b0010011, 0b101, 0b000000); + INSN(slli, 0b0010011, 0b001, 0b000000, COMPRESSIBLE); + INSN(srai, 0b0010011, 0b101, 0b010000, COMPRESSIBLE); + INSN(srli, 0b0010011, 0b101, 0b000000, COMPRESSIBLE); #undef INSN @@ -1018,8 +1073,9 @@ enum operand_size { int8, int16, int32, uint32, int64 }; #undef INSN // Upper Immediate Instruction -#define INSN(NAME, op) \ +#define INSN(NAME, op, C) \ void NAME(Register Rd, int32_t imm) { \ + EMIT_MAY_COMPRESS(C, NAME, Rd, imm) \ int32_t upperImm = imm >> 12; \ unsigned insn = 0; \ patch((address)&insn, 6, 0, op); \ @@ -1029,8 +1085,11 @@ enum operand_size { int8, int16, int32, uint32, int64 }; emit(insn); \ } - INSN(lui, 0b0110111); - INSN(auipc, 0b0010111); + INSN(lui, 0b0110111, COMPRESSIBLE); + INSN(auipc, 0b0010111, NOT_COMPRESSIBLE); + + // C-Ext: incompressible version + INSN(lui_nc, 0b0110111, NOT_COMPRESSIBLE); #undef INSN @@ -1913,6 +1972,4 @@ enum Nf { }; -class BiasedLockingCounters; - #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp b/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp new file mode 100644 index 00000000000..79cf860a4c0 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp @@ -0,0 +1,865 @@ +/* + * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2021, Alibaba Group Holding Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP +#define CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP + + // C-Ext: If an instruction is compressible, then + // we will implicitly emit a 16-bit compressed instruction instead of the 32-bit + // instruction in Assembler. All below logic follows Chapter - + // "C" Standard Extension for Compressed Instructions, Version 2.0. + // We can get code size reduction and performance improvement with this extension, + // considering the reduction of instruction size and the code density increment. + + // Note: + // 1. When UseRVC is enabled, some of normal instructions will be implicitly + // changed to its 16-bit version. + // 2. C-Ext's instructions in Assembler always end with '_c' suffix, as 'li_c', + // but most of time we have no need to explicitly use these instructions. + // (Although spec says 'c.li', we use 'li_c' to unify related names - see below. + // 3. In some cases, we need to force using one instruction's uncompressed version, + // for instance code being patched should remain its general and longest version + // to cover all possible cases, or code requiring a fixed length. + // So we introduce '_nc' suffix (short for: not compressible) to force an instruction + // to remain its normal 4-byte version. + // An example: + // j() (32-bit) could become j_c() (16-bit) with -XX:+UseRVC if compressible. We could + // use j_nc() to force it to remain its normal 4-byte version. + // 4. Using -XX:PrintAssemblyOptions=no-aliases could print C-Ext instructions instead of + // normal ones. + // + + // C-Ext: incompressible version + void j_nc(const address &dest, Register temp = t0); + void j_nc(const Address &adr, Register temp = t0) ; + void j_nc(Label &l, Register temp = t0); + void jal_nc(Label &l, Register temp = t0); + void jal_nc(const address &dest, Register temp = t0); + void jal_nc(const Address &adr, Register temp = t0); + void jr_nc(Register Rs); + void jalr_nc(Register Rs); + void call_nc(const address &dest, Register temp = t0); + void tail_nc(const address &dest, Register temp = t0); + + // C-Ext: extract a 16-bit instruction. + static inline uint16_t extract_c(uint16_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 15); + unsigned nbits = msb - lsb + 1; + uint16_t mask = (1U << nbits) - 1; + uint16_t result = val >> lsb; + result &= mask; + return result; + } + + static inline int16_t sextract_c(uint16_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 15); + int16_t result = val << (15 - msb); + result >>= (15 - msb + lsb); + return result; + } + + // C-Ext: patch a 16-bit instruction. + static void patch_c(address a, unsigned msb, unsigned lsb, uint16_t val) { + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 15); + unsigned nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + uint16_t mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + uint16_t target = *(uint16_t *)a; + target &= ~mask; + target |= val; + *(uint16_t *)a = target; + } + + static void patch_c(address a, unsigned bit, uint16_t val) { + patch_c(a, bit, bit, val); + } + + // C-Ext: patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) + static void patch_reg_c(address a, unsigned lsb, Register reg) { + patch_c(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // C-Ext: patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) + static void patch_compressed_reg_c(address a, unsigned lsb, Register reg) { + patch_c(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + + // C-Ext: patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) + static void patch_reg_c(address a, unsigned lsb, FloatRegister reg) { + patch_c(a, lsb + 4, lsb, reg->encoding_nocheck()); + } + + // C-Ext: patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) + static void patch_compressed_reg_c(address a, unsigned lsb, FloatRegister reg) { + patch_c(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); + } + +public: + +// C-Ext: Compressed Instructions + +// -------------- C-Ext Instruction Definitions -------------- + + void nop_c() { + addi_c(x0, 0); + } + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + patch_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(addi_c, 0b000, 0b01); + INSN(addiw_c, 0b001, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 10, 0)); \ + assert_cond((imm & 0b1111) == 0); \ + assert_cond(imm != 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ + patch_c((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ + patch_c((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ + patch_reg_c((address)&insn, 7, sp); \ + patch_c((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(addi16sp_c, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(uimm != 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_compressed_reg_c((address)&insn, 2, Rd); \ + patch_c((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ + patch_c((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + patch_c((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ + patch_c((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(addi4spn_c, 0b000, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + patch_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(slli_c, 0b000, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, uint32_t shamt) { \ + assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ + assert_cond(shamt != 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ + patch_compressed_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 11, 10, funct2); \ + patch_c((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(srli_c, 0b100, 0b00, 0b01); + INSN(srai_c, 0b100, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, funct2, op) \ + void NAME(Register Rd_Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + patch_compressed_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 11, 10, funct2); \ + patch_c((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(andi_c, 0b100, 0b10, 0b01); + +#undef INSN + +#define INSN(NAME, funct6, funct2, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_compressed_reg_c((address)&insn, 2, Rs2); \ + patch_c((address)&insn, 6, 5, funct2); \ + patch_compressed_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 15, 10, funct6); \ + emit_int16(insn); \ + } + + INSN(sub_c, 0b100011, 0b00, 0b01); + INSN(xor_c, 0b100011, 0b01, 0b01); + INSN(or_c, 0b100011, 0b10, 0b01); + INSN(and_c, 0b100011, 0b11, 0b01); + INSN(subw_c, 0b100111, 0b00, 0b01); + INSN(addw_c, 0b100111, 0b01, 0b01); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rd_Rs1, Register Rs2) { \ + assert_cond(Rd_Rs1 != x0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_reg_c((address)&insn, 2, Rs2); \ + patch_reg_c((address)&insn, 7, Rd_Rs1); \ + patch_c((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(mv_c, 0b1000, 0b10); + INSN(add_c, 0b1001, 0b10); + +#undef INSN + +#define INSN(NAME, funct4, op) \ + void NAME(Register Rs1) { \ + assert_cond(Rs1 != x0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_reg_c((address)&insn, 2, x0); \ + patch_reg_c((address)&insn, 7, Rs1); \ + patch_c((address)&insn, 15, 12, funct4); \ + emit_int16(insn); \ + } + + INSN(jr_c, 0b1000, 0b10); + INSN(jalr_c, 0b1001, 0b10); + +#undef INSN + + typedef void (Assembler::* j_c_insn)(address dest); + typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); + + void wrap_label(Label &L, j_c_insn insn); + void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn); + +#define INSN(NAME, funct3, op) \ + void NAME(int32_t offset) { \ + assert_cond(is_imm_in_range(offset, 11, 1)); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ + patch_c((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ + patch_c((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ + patch_c((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ + patch_c((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ + patch_c((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ + patch_c((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 11, 1)); \ + j_c(distance); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } + + INSN(j_c, 0b101, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs1, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 8, 1)); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ + patch_c((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ + patch_compressed_reg_c((address)&insn, 7, Rs1); \ + patch_c((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ + patch_c((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } \ + void NAME(Register Rs1, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = dest - pc(); \ + assert_cond(is_imm_in_range(distance, 8, 1)); \ + NAME(Rs1, distance); \ + } \ + void NAME(Register Rs1, Label &L) { \ + wrap_label(L, Rs1, &Assembler::NAME); \ + } + + INSN(beqz_c, 0b110, 0b01); + INSN(bnez_c, 0b111, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 18, 0)); \ + assert_cond((imm & 0xfff) == 0); \ + assert_cond(imm != 0); \ + assert_cond(Rd != x0 && Rd != x2); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ + patch_reg_c((address)&insn, 7, Rd); \ + patch_c((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(lui_c, 0b011, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, int32_t imm) { \ + assert_cond(is_imm_in_range(imm, 6, 0)); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 6, 2, (imm & right_n_bits(5))); \ + patch_reg_c((address)&insn, 7, Rd); \ + patch_c((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(li_c, 0b010, 0b01); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE, CHECK) \ + void NAME(REGISTER_TYPE Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + IF(CHECK, assert_cond(Rd != x0);) \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ + patch_c((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ + patch_reg_c((address)&insn, 7, Rd); \ + patch_c((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + +#define IF(BOOL, ...) IF_##BOOL(__VA_ARGS__) +#define IF_true(code) code +#define IF_false(code) + + INSN(ldsp_c, 0b011, 0b10, Register, true); + INSN(fldsp_c, 0b001, 0b10, FloatRegister, false); + +#undef IF_false +#undef IF_true +#undef IF +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_compressed_reg_c((address)&insn, 2, Rd_Rs2); \ + patch_c((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ + patch_compressed_reg_c((address)&insn, 7, Rs1); \ + patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(ld_c, 0b011, 0b00, Register); + INSN(sd_c, 0b111, 0b00, Register); + INSN(fld_c, 0b001, 0b00, FloatRegister); + INSN(fsd_c, 0b101, 0b00, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op, REGISTER_TYPE) \ + void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ + assert_cond((uimm & 0b111) == 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_reg_c((address)&insn, 2, Rs2); \ + patch_c((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ + patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(sdsp_c, 0b111, 0b10, Register); + INSN(fsdsp_c, 0b101, 0b10, FloatRegister); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rs2, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_reg_c((address)&insn, 2, Rs2); \ + patch_c((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ + patch_c((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(swsp_c, 0b110, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + assert_cond(Rd != x0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ + patch_c((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ + patch_reg_c((address)&insn, 7, Rd); \ + patch_c((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(lwsp_c, 0b010, 0b10); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ + assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ + assert_cond((uimm & 0b11) == 0); \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_compressed_reg_c((address)&insn, 2, Rd_Rs2); \ + patch_c((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ + patch_c((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ + patch_compressed_reg_c((address)&insn, 7, Rs1); \ + patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(lw_c, 0b010, 0b00); + INSN(sw_c, 0b110, 0b00); + +#undef INSN + +#define INSN(NAME, funct3, op) \ + void NAME() { \ + uint16_t insn = 0; \ + patch_c((address)&insn, 1, 0, op); \ + patch_c((address)&insn, 11, 2, 0x0); \ + patch_c((address)&insn, 12, 12, 0b1); \ + patch_c((address)&insn, 15, 13, funct3); \ + emit_int16(insn); \ + } + + INSN(ebreak_c, 0b100, 0b10); + +#undef INSN + +// -------------- C-Ext Transformation Macros -------------- + +// a pivotal dispatcher for C-Ext +#define EMIT_MAY_COMPRESS(COMPRESSIBLE, NAME, ...) EMIT_MAY_COMPRESS_##COMPRESSIBLE(NAME, __VA_ARGS__) +#define EMIT_MAY_COMPRESS_true(NAME, ...) EMIT_MAY_COMPRESS_##NAME(__VA_ARGS__) +#define EMIT_MAY_COMPRESS_false(NAME, ...) + +#define IS_COMPRESSIBLE(...) if (__VA_ARGS__) +#define CHECK_CEXT_AND_COMPRESSIBLE(...) IS_COMPRESSIBLE(UseRVC && __VA_ARGS__) +#define CHECK_CEXT() if (UseRVC) + +// C-Ext transformation macros +#define EMIT_RVC_cond(PREFIX, COND, EMIT) { \ + PREFIX \ + CHECK_CEXT_AND_COMPRESSIBLE(COND) { \ + EMIT; \ + return; \ + } \ + } + +#define EMIT_RVC_cond2(PREFIX, COND1, EMIT1, COND2, EMIT2) { \ + PREFIX \ + CHECK_CEXT() { \ + IS_COMPRESSIBLE(COND1) { \ + EMIT1; \ + return; \ + } else IS_COMPRESSIBLE(COND2) { \ + EMIT2; \ + return; \ + } \ + } \ + } + +#define EMIT_RVC_cond4(PREFIX, COND1, EMIT1, COND2, EMIT2, COND3, EMIT3, COND4, EMIT4) { \ + PREFIX \ + CHECK_CEXT() { \ + IS_COMPRESSIBLE(COND1) { \ + EMIT1; \ + return; \ + } else IS_COMPRESSIBLE(COND2) { \ + EMIT2; \ + return; \ + } else IS_COMPRESSIBLE(COND3) { \ + EMIT3; \ + return; \ + } else IS_COMPRESSIBLE(COND4) { \ + EMIT4; \ + return; \ + } \ + } \ + } + +// -------------------------- +// Register instructions +// -------------------------- +// add -> c.add +#define EMIT_MAY_COMPRESS_add(Rd, Rs1, Rs2) \ + EMIT_RVC_cond( \ + Register src = noreg;, \ + Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd)), \ + add_c(Rd, src) \ + ) + +// -------------------------- +// sub/subw -> c.sub/c.subw +#define EMIT_MAY_COMPRESS_sub_helper(NAME_C, Rd, Rs1, Rs2) \ + EMIT_RVC_cond(, \ + Rs1 == Rd && Rd->is_compressed_valid() && Rs2->is_compressed_valid(), \ + NAME_C(Rd, Rs2) \ + ) + +#define EMIT_MAY_COMPRESS_sub(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_sub_helper(sub_c, Rd, Rs1, Rs2) + +#define EMIT_MAY_COMPRESS_subw(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_sub_helper(subw_c, Rd, Rs1, Rs2) + +// -------------------------- +// xor/or/and/addw -> c.xor/c.or/c.and/c.addw +#define EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(NAME_C, Rd, Rs1, Rs2) \ + EMIT_RVC_cond( \ + Register src = noreg;, \ + Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ + ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd)), \ + NAME_C(Rd, src) \ + ) + +#define EMIT_MAY_COMPRESS_xorr(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(xor_c, Rd, Rs1, Rs2) + +#define EMIT_MAY_COMPRESS_orr(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(or_c, Rd, Rs1, Rs2) + +#define EMIT_MAY_COMPRESS_andr(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(and_c, Rd, Rs1, Rs2) + +#define EMIT_MAY_COMPRESS_addw(Rd, Rs1, Rs2) \ + EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(addw_c, Rd, Rs1, Rs2) + +// -------------------------- +// Load/store register (all modes) +// -------------------------- +private: + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0 && \ + (!ld || rd_rs2 != x0); \ + } \ + + FUNC(is_ldsdsp_c, 0b111, 9); + FUNC(is_lwswsp_c, 0b011, 8); +#undef FUNC + +#define FUNC(NAME, funct3, bits) \ + bool NAME(Register rs1, int32_t imm12) { \ + return rs1 == sp && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_fldsdsp_c, 0b111, 9); +#undef FUNC + +#define FUNC(NAME, REG_TYPE, funct3, bits) \ + bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ + return rs1->is_compressed_valid() && \ + rd_rs2->is_compressed_valid() && \ + is_unsigned_imm_in_range(imm12, bits, 0) && \ + (intx(imm12) & funct3) == 0x0; \ + } \ + + FUNC(is_ldsd_c, Register, 0b111, 8); + FUNC(is_lwsw_c, Register, 0b011, 7); + FUNC(is_fldsd_c, FloatRegister, 0b111, 8); +#undef FUNC + +public: +// -------------------------- +// ld -> c.ldsp/c.ld +#define EMIT_MAY_COMPRESS_ld(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_ldsdsp_c(Rs, Rd, offset, true), \ + ldsp_c(Rd, offset), \ + is_ldsd_c(Rs, Rd, offset), \ + ld_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// sd -> c.sdsp/c.sd +#define EMIT_MAY_COMPRESS_sd(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_ldsdsp_c(Rs, Rd, offset, false), \ + sdsp_c(Rd, offset), \ + is_ldsd_c(Rs, Rd, offset), \ + sd_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// lw -> c.lwsp/c.lw +#define EMIT_MAY_COMPRESS_lw(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_lwswsp_c(Rs, Rd, offset, true), \ + lwsp_c(Rd, offset), \ + is_lwsw_c(Rs, Rd, offset), \ + lw_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// sw -> c.swsp/c.sw +#define EMIT_MAY_COMPRESS_sw(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_lwswsp_c(Rs, Rd, offset, false), \ + swsp_c(Rd, offset), \ + is_lwsw_c(Rs, Rd, offset), \ + sw_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// fld -> c.fldsp/c.fld +#define EMIT_MAY_COMPRESS_fld(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_fldsdsp_c(Rs, offset), \ + fldsp_c(Rd, offset), \ + is_fldsd_c(Rs, Rd, offset), \ + fld_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// fsd -> c.fsdsp/c.fsd +#define EMIT_MAY_COMPRESS_fsd(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + is_fldsdsp_c(Rs, offset), \ + fsdsp_c(Rd, offset), \ + is_fldsd_c(Rs, Rd, offset), \ + fsd_c(Rd, Rs, offset) \ + ) + +// -------------------------- +// Conditional branch instructions +// -------------------------- +// beq/bne -> c.beqz/c.bnez + +// TODO: Removing the below 'offset != 0' check needs us to fix lots of '__ beqz() / __ benz()' +// to '__ beqz_nc() / __ bnez_nc()' everywhere. +#define EMIT_MAY_COMPRESS_beqz_bnez_helper(NAME_C, Rs1, Rs2, offset) \ + EMIT_RVC_cond(, \ + offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ + is_imm_in_range(offset, 8, 1), \ + NAME_C(Rs1, offset) \ + ) + +#define EMIT_MAY_COMPRESS_beq(Rs1, Rs2, offset) \ + EMIT_MAY_COMPRESS_beqz_bnez_helper(beqz_c, Rs1, Rs2, offset) + +#define EMIT_MAY_COMPRESS_bne(Rs1, Rs2, offset) \ + EMIT_MAY_COMPRESS_beqz_bnez_helper(bnez_c, Rs1, Rs2, offset) + +// -------------------------- +// Unconditional branch instructions +// -------------------------- +// jalr/jal -> c.jr/c.jalr/c.j + +#define EMIT_MAY_COMPRESS_jalr(Rd, Rs, offset) \ + EMIT_RVC_cond2(, \ + offset == 0 && Rd == x1 && Rs != x0, \ + jalr_c(Rs), \ + offset == 0 && Rd == x0 && Rs != x0, \ + jr_c(Rs) \ + ) + +// TODO: Removing the 'offset != 0' check needs us to fix lots of '__ j()' +// to '__ j_nc()' manually everywhere. +#define EMIT_MAY_COMPRESS_jal(Rd, offset) \ + EMIT_RVC_cond(, \ + offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1), \ + j_c(offset) \ + ) + +// -------------------------- +// Upper Immediate Instruction +// -------------------------- +// lui -> c.lui +#define EMIT_MAY_COMPRESS_lui(Rd, imm) \ + EMIT_RVC_cond(, \ + Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0), \ + lui_c(Rd, imm) \ + ) + +// -------------------------- +// Miscellaneous Instructions +// -------------------------- +// ebreak -> c.ebreak +#define EMIT_MAY_COMPRESS_ebreak() \ + EMIT_RVC_cond(, \ + true, \ + ebreak_c() \ + ) + +// -------------------------- +// Immediate Instructions +// -------------------------- +// addi -> c.addi16sp/c.addi4spn/c.mv/c.addi/. An addi instruction able to transform to c.nop will be ignored. +#define EMIT_MAY_COMPRESS_addi(Rd, Rs1, imm) \ + EMIT_RVC_cond4(, \ + Rs1 == sp && Rd == Rs1 && imm != 0 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0), \ + addi16sp_c(imm), \ + Rs1 == sp && Rd->is_compressed_valid() && imm != 0 && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0), \ + addi4spn_c(Rd, imm), \ + Rd == Rs1 && is_imm_in_range(imm, 6, 0), \ + if (imm != 0) { addi_c(Rd, imm); }, \ + imm == 0 && Rd != x0 && Rs1 != x0, \ + mv_c(Rd, Rs1) \ + ) + +// -------------------------- +// addiw -> c.addiw +#define EMIT_MAY_COMPRESS_addiw(Rd, Rs1, imm) \ + EMIT_RVC_cond(, \ + Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0), \ + addiw_c(Rd, imm) \ + ) + +// -------------------------- +// and_imm12 -> c.andi +#define EMIT_MAY_COMPRESS_and_imm12(Rd, Rs1, imm) \ + EMIT_RVC_cond(, \ + Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0), \ + andi_c(Rd, imm) \ + ) + +// -------------------------- +// Shift Immediate Instructions +// -------------------------- +// slli -> c.slli +#define EMIT_MAY_COMPRESS_slli(Rd, Rs1, shamt) \ + EMIT_RVC_cond(, \ + Rd == Rs1 && Rd != x0 && shamt != 0, \ + slli_c(Rd, shamt) \ + ) + +// -------------------------- +// srai/srli -> c.srai/c.srli +#define EMIT_MAY_COMPRESS_srai_srli_helper(NAME_C, Rd, Rs1, shamt) \ + EMIT_RVC_cond(, \ + Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0, \ + NAME_C(Rd, shamt) \ + ) + +#define EMIT_MAY_COMPRESS_srai(Rd, Rs1, shamt) \ + EMIT_MAY_COMPRESS_srai_srli_helper(srai_c, Rd, Rs1, shamt) + +#define EMIT_MAY_COMPRESS_srli(Rd, Rs1, shamt) \ + EMIT_MAY_COMPRESS_srai_srli_helper(srli_c, Rd, Rs1, shamt) + +// -------------------------- + +// a compile time dispatcher +#define EMIT_MAY_COMPRESS_NAME_true(NAME, ARGS) NAME ARGS +#define EMIT_MAY_COMPRESS_NAME_false(NAME, ARGS) NAME##_nc ARGS +#define EMIT_MAY_COMPRESS_NAME(COMPRESSIBLE, NAME, ARGS) EMIT_MAY_COMPRESS_NAME_##COMPRESSIBLE(NAME, ARGS) + +// a runtime dispatcher (if clause is needed) +#define EMIT_MAY_COMPRESS_INST(COMPRESSIBLE, NAME, ARGS) \ + if (COMPRESSIBLE) { \ + EMIT_MAY_COMPRESS_NAME_true(NAME, ARGS); \ + } else { \ + EMIT_MAY_COMPRESS_NAME_false(NAME, ARGS); \ + } + +#endif // CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP \ No newline at end of file diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp index 687381c0897..7b96e358517 100644 --- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -44,7 +44,7 @@ void C1SafepointPollStub::emit_code(LIR_Assembler* ce) __ bind(_entry); InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset()); __ code_section()->relocate(__ pc(), safepoint_pc.rspec()); - __ la(t0, safepoint_pc.target()); + __ la(t0, safepoint_pc.target(), NOT_COMPRESSIBLE); __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); assert(SharedRuntime::polling_page_return_handler_blob() != NULL, @@ -106,9 +106,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) __ mv(t1, _array->as_pointer_register()); stub_id = Runtime1::throw_range_check_failed_id; } - int32_t off = 0; - __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off); - __ jalr(ra, ra, off); + __ jalr_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), ra); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); @@ -257,7 +255,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); } -int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; +int PatchingStub::_patch_info_offset = -NativeGeneralJump::get_instruction_size(); void PatchingStub::align_patch_site(MacroAssembler* masm) {} diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index 14ec4a5f995..f3d132889dc 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -1318,7 +1318,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op } } -void LIR_Assembler::align_call(LIR_Code code) { } +void LIR_Assembler::align_call(LIR_Code code) { + // C-Ext: With C-Ext a call may get 2-byte aligned. + // the address of jal itself (which will be patched later) should not span the cache line. + // See CallDynamicJavaDirectNode::compute_padding() for more info. + __ align(4); +} void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -1375,9 +1380,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit } int pc_for_athrow_offset = __ offset(); InternalAddress pc_for_athrow(__ pc()); - int32_t off = 0; - __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off); - __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off); + __ addi_patchable(exceptionPC->as_register(), pc_for_athrow, exceptionPC->as_register()); add_call_info(pc_for_athrow_offset, info); // for exception handler __ verify_not_null_oop(x10); @@ -1801,9 +1804,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg if (cb != NULL) { __ far_call(RuntimeAddress(dest)); } else { - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(dest), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(dest), t0); } if (info != NULL) { diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 02c54ee959c..0ef83964d48 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -325,7 +325,7 @@ void C1_MacroAssembler::verified_entry() { // must ensure that this first instruction is a J, JAL or NOP. // Make it a NOP. - nop(); + nop_nc(); } void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp index b3b405f3040..ff4fd393e2e 100644 --- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp @@ -67,9 +67,7 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres set_last_Java_frame(sp, fp, retaddr, t0); // do the call - int32_t off = 0; - la_patchable(t0, RuntimeAddress(entry), off); - jalr(x1, t0, off); + jalr_patchable(x1, RuntimeAddress(entry), t0); bind(retaddr); int call_offset = offset(); // verify callee-saved register @@ -569,9 +567,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { Label retaddr; __ set_last_Java_frame(sp, fp, retaddr, t0); // do the call - int32_t off = 0; - __ la_patchable(t0, RuntimeAddress(target), off); - __ jalr(x1, t0, off); + __ jalr_patchable(x1, RuntimeAddress(target), t0); __ bind(retaddr); OopMapSet* oop_maps = new OopMapSet(); assert_cond(oop_maps != NULL); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index ff030372712..203556b2644 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1196,21 +1196,21 @@ typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, static conditional_branch_insn conditional_branches[] = { /* SHORT branches */ - (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::beq_nc, (conditional_branch_insn)&Assembler::bgt, NULL, // BoolTest::overflow (conditional_branch_insn)&Assembler::blt, - (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::bne_nc, (conditional_branch_insn)&Assembler::ble, NULL, // BoolTest::no_overflow (conditional_branch_insn)&Assembler::bge, /* UNSIGNED branches */ - (conditional_branch_insn)&Assembler::beq, + (conditional_branch_insn)&Assembler::beq_nc, (conditional_branch_insn)&Assembler::bgtu, NULL, (conditional_branch_insn)&Assembler::bltu, - (conditional_branch_insn)&Assembler::bne, + (conditional_branch_insn)&Assembler::bne_nc, (conditional_branch_insn)&Assembler::bleu, NULL, (conditional_branch_insn)&Assembler::bgeu @@ -1259,11 +1259,11 @@ void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, switch (cmpFlag) { case BoolTest::eq: case BoolTest::le: - beqz(op1, L, is_far); + beqz_nc(op1, L, is_far); break; case BoolTest::ne: case BoolTest::gt: - bnez(op1, L, is_far); + bnez_nc(op1, L, is_far); break; default: ShouldNotReachHere(); @@ -1273,10 +1273,10 @@ void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { switch (cmpFlag) { case BoolTest::eq: - beqz(op1, L, is_far); + beqz_nc(op1, L, is_far); break; case BoolTest::ne: - bnez(op1, L, is_far); + bnez_nc(op1, L, is_far); break; default: ShouldNotReachHere(); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index ef854dc2fce..fd376d32824 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -82,6 +82,16 @@ static const int double_branch_mask = 1 << bool_test_bits; // cmp + // C-Ext: these cmp functions remain uncompressed in C2 MachNodes' emission - + // as the reason described in MachEpilogNode::emit() in PhaseOutput::scratch_emit_size() + // it simulates a node's size, but for MachBranchNodes it emits a fake Label just + // near the node itself - the offset is so small that in scratch emission phase it always + // get compressed in our implicit compression phase - but in real world the Label may be + // anywhere so it may not be compressed, so here is the mismatch: it runs shorten_branches(); + // but with C-Ext we may need a further, say, shorten_compressed_branches() or something. + // After researching we find performance will not have much enhancement even if compressing + // them and the cost is a bit big to support MachBranchNodes' compression. + // So as a solution, we can simply disable the compression of MachBranchNodes. void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false); diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp index 0b01a47bd5c..4a86b964c2f 100644 --- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp @@ -46,7 +46,7 @@ define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, ConditionalMoveLimit, 0); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); -define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, InteriorEntryAlignment, 4); define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopPercentProfileLimit, 10); diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp index 8956e4b7941..6469f6fe6cf 100644 --- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp @@ -41,7 +41,7 @@ void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointP __ bind(entry->_stub_label); InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec()); - __ la(t0, safepoint_pc.target()); + __ la(t0, safepoint_pc.target(), NOT_COMPRESSIBLE); __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset())); __ far_jump(callback_addr); } diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp index 75bc4be7840..72215c14071 100644 --- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp @@ -69,8 +69,12 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) #undef __ int CompiledStaticCall::to_interp_stub_size() { - // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr - return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size; + // fence_i + fence* + (lui, addi, slli(C), addi, slli(C), addi) + (lui, addi, slli(C), addi, slli(C)) + jalr + return NativeFenceI::instruction_size() + + (!UseRVC ? + 12 * NativeInstruction::instruction_size : + 8 * NativeInstruction::instruction_size + 4 * NativeInstruction::compressed_instruction_size + ); } int CompiledStaticCall::to_trampoline_stub_size() { diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp index f23ff34e3f4..7049b720d3e 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp @@ -180,20 +180,12 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, // Get the current end of the heap ExternalAddress address_end((address) Universe::heap()->end_addr()); - { - int32_t offset; - __ la_patchable(t1, address_end, offset); - __ ld(t1, Address(t1, offset)); - } + __ ld_patchable(t1, address_end, t1); // Get the current top of the heap ExternalAddress address_top((address) Universe::heap()->top_addr()); - { - int32_t offset; - __ la_patchable(t0, address_top, offset); - __ addi(t0, t0, offset); - __ lr_d(obj, t0, Assembler::aqrl); - } + __ addi_patchable(t0, address_top, t0); + __ lr_d(obj, t0, Assembler::aqrl); // Adjust it my the size of our new object if (var_size_in_bytes == noreg) { @@ -231,6 +223,8 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); } +extern int nmethod_barrier_guard_offset(); + void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); @@ -238,6 +232,12 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { return; } + // C-Ext: With C-Ext we may come here with a 2-byte alignment, hence an alignment is needed. + // See below comments about amo, also native_nmethod_barrier() to find the entry's calculation strategy. + while ((__ offset() + nmethod_barrier_guard_offset()) % 4 != 0) { __ nop(); } + + int start = __ offset(); + Label skip, guard; Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset())); @@ -250,10 +250,14 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { __ beq(t0, t1, skip); int32_t offset = 0; - __ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset); - __ jalr(ra, t0, offset); + __ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset, NOT_COMPRESSIBLE); + __ jalr_nc(ra, t0, offset); __ j(skip); + // RISCV's amoswap instructions need an alignment for the memory address it swaps + // C-Ext: So with C-Ext we need to manually align it to 4-byte + assert(__ offset() - start == nmethod_barrier_guard_offset() && __ offset() % 4 == 0, "offsets equality and alignment"); + __ bind(guard); __ emit_int32(0); // nmethod guard value. Skipped over in common case. diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp index ae7ee4c5a44..4f276ef633d 100644 --- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp @@ -36,11 +36,21 @@ #include "utilities/debug.hpp" class NativeNMethodBarrier: public NativeInstruction { +public: + enum { + total_normal_guard_offset = 12 * instruction_size, + total_compressed_guard_offset = 10 * instruction_size + 2 * compressed_instruction_size, + + total_normal_size = total_normal_guard_offset + 4, + total_compressed_size = total_compressed_guard_offset + 4, + }; + +private: address instruction_address() const { return addr_at(0); } int *guard_addr() { - /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */ - return reinterpret_cast(instruction_address() + 12 * 4); + /* auipc + lwu + fence + lwu + beq + lui + addi + (C)slli + addi + (C)slli + jalr + j */ + return reinterpret_cast(instruction_address() + guard_offset()); } public: @@ -53,28 +63,55 @@ class NativeNMethodBarrier: public NativeInstruction { } void verify() const; + + static int guard_offset() { + return UseRVC ? total_compressed_guard_offset : total_normal_guard_offset; + } }; +int nmethod_barrier_guard_offset() { + return NativeNMethodBarrier::guard_offset(); +} + // Store the instruction bitmask, bits and name for checking the barrier. struct CheckInsn { uint32_t mask; uint32_t bits; const char *name; + int instruction_size; }; static const struct CheckInsn barrierInsn[] = { - { 0x00000fff, 0x00000297, "auipc t0, 0 "}, - { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "}, - { 0xffffffff, 0x0aa0000f, "fence ir, ir "}, - { 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"}, - { 0x01fff07f, 0x00628063, "beq t0, t1, skip "}, - { 0x00000fff, 0x000002b7, "lui t0, imm0 "}, - { 0x000fffff, 0x00028293, "addi t0, t0, imm1 "}, - { 0xffffffff, 0x00b29293, "slli t0, t0, 11 "}, - { 0x000fffff, 0x00028293, "addi t0, t0, imm2 "}, - { 0xffffffff, 0x00529293, "slli t0, t0, 5 "}, - { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "}, - { 0x00000fff, 0x0000006f, "j skip "} + { 0x00000fff, 0x00000297, "auipc t0, 0 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x0002e283, "lwu t0, 48(t0) ", NativeInstruction::instruction_size}, + { 0xffffffff, 0x0aa0000f, "fence ir, ir ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x000be303, "lwu t1, 36(xthread) ", NativeInstruction::instruction_size}, + { 0x01fff07f, 0x00628063, "beq t0, t1, skip ", NativeInstruction::instruction_size}, + { 0x00000fff, 0x000002b7, "lui t0, imm0 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm1 ", NativeInstruction::instruction_size}, + { 0xffffffff, 0x00b29293, "slli t0, t0, 11 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm2 ", NativeInstruction::instruction_size}, + { 0xffffffff, 0x00529293, "slli t0, t0, 5 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) ", NativeInstruction::instruction_size}, + { 0x00000fff, 0x0000006f, "j skip ", NativeInstruction::instruction_size} + /* guard: */ + /* 32bit nmethod guard value */ + /* skip: */ +}; + +static const struct CheckInsn barrierCInsn[] = { + { 0x00000fff, 0x00000297, "auipc t0, 0 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x0002e283, "lwu t0, 44(t0) ", NativeInstruction::instruction_size}, + { 0xffffffff, 0x0aa0000f, "fence ir, ir ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x000be303, "lwu t1, 36(xthread) ", NativeInstruction::instruction_size}, + { 0x01fff07f, 0x00628063, "beq t0, t1, skip ", NativeInstruction::instruction_size}, + { 0x00000fff, 0x000002b7, "lui t0, imm0 ", NativeInstruction::instruction_size}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm1 ", NativeInstruction::instruction_size}, + { 0x00000fff, 0x02ae, "c.slli t0, t0, 11 ", NativeInstruction::compressed_instruction_size}, + { 0x000fffff, 0x00028293, "addi t0, t0, imm2 ", NativeInstruction::instruction_size}, + { 0x0000ffff, 0x0296, "c.slli t0, t0, 5 ", NativeInstruction::compressed_instruction_size}, + { 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) ", NativeInstruction::instruction_size}, + { 0x00000fff, 0x0000006f, "j skip ", NativeInstruction::instruction_size} /* guard: */ /* 32bit nmethod guard value */ /* skip: */ @@ -85,13 +122,22 @@ static const struct CheckInsn barrierInsn[] = { // register numbers and immediate values in the encoding. void NativeNMethodBarrier::verify() const { intptr_t addr = (intptr_t) instruction_address(); - for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) { - uint32_t inst = *((uint32_t*) addr); - if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) { + const struct CheckInsn *insns; + size_t size; + if (!UseRVC) { + insns = barrierInsn; + size = sizeof(barrierInsn) / sizeof(struct CheckInsn); + } else { + insns = barrierCInsn; + size = sizeof(barrierCInsn) / sizeof(struct CheckInsn); + } + for(unsigned int i = 0; i < size; i++ ) { + uint32_t inst = insns[i].instruction_size == NativeInstruction::compressed_instruction_size ? *((uint16_t*) addr) : *((uint32_t*) addr); + if ((inst & insns[i].mask) != insns[i].bits) { tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst); - fatal("not an %s instruction.", barrierInsn[i].name); + fatal("not an %s instruction.", insns[i].name); } - addr += 4; + addr += insns[i].instruction_size; } } @@ -141,10 +187,15 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { // see BarrierSetAssembler::nmethod_entry_barrier // auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32 -static const int entry_barrier_offset = -4 * 13; +static const int entry_barrier_normal_offset = -NativeNMethodBarrier::total_normal_size; +static const int entry_barrier_compressed_offset = -NativeNMethodBarrier::total_compressed_size; + +static const int entry_barrier_offset() { + return !UseRVC ? entry_barrier_normal_offset : entry_barrier_compressed_offset; +} static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) { - address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset; + address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(); NativeNMethodBarrier* barrier = reinterpret_cast(barrier_address); debug_only(barrier->verify()); return barrier; diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp index a1d4be63f61..4ce8013644f 100644 --- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp @@ -336,9 +336,7 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z { ZSaveLiveRegisters save_live_registers(masm, stub); ZSetupArguments setup_arguments(masm, stub); - int32_t offset = 0; - __ la_patchable(t0, stub->slow_path(), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, stub->slow_path(), t0); } // Stub exit diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp index 0f48c46b409..b0896aea0a6 100644 --- a/src/hotspot/cpu/riscv/globals_riscv.hpp +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp @@ -37,7 +37,7 @@ define_pd_global(bool, TrapBasedNullChecks, false); define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast define_pd_global(uintx, CodeCacheSegmentSize, 64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment. -define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, CodeEntryAlignment, 16); define_pd_global(intx, OptoLoopAlignment, 16); #define DEFAULT_STACK_YELLOW_PAGES (2) @@ -90,6 +90,7 @@ define_pd_global(intx, InlineSmallCode, 1000); "Extend fence.i to fence.i + fence.") \ product(bool, AvoidUnalignedAccesses, true, \ "Avoid generating unaligned memory accesses") \ - product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") + product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \ + product(bool, UseRVC, true, EXPERIMENTAL, "Use RVC instructions") \ #endif // CPU_RISCV_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 549d56eb94e..88a6961b2e3 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -182,9 +182,7 @@ void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, i } void InterpreterMacroAssembler::get_dispatch() { - int32_t offset = 0; - la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); - addi(xdispatch, xdispatch, offset); + addi_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), xdispatch); } void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp index 814ed23e471..55b186f55d6 100644 --- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp @@ -74,9 +74,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { address fast_entry = __ pc(); Label slow; - int32_t offset = 0; - __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); - __ addi(rcounter_addr, rcounter_addr, offset); + __ addi_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), rcounter_addr); Address safepoint_counter_addr(rcounter_addr, 0); __ lwu(rcounter, safepoint_counter_addr); @@ -169,9 +167,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { { __ enter(); - int32_t tmp_offset = 0; - __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); - __ jalr(x1, t0, tmp_offset); + __ jalr_patchable(x1, ExternalAddress(slow_case_addr), t0); __ leave(); __ ret(); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index c9dcae99747..16cfc7cd23b 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -225,10 +225,11 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, - Register temp) { + Register temp, + bool compressible) { assert(last_java_pc != NULL, "must provide a valid PC"); - la(temp, last_java_pc); + la(temp, last_java_pc, compressible); sd(temp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); set_last_Java_frame(last_java_sp, last_java_fp, noreg, temp); @@ -243,7 +244,7 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, } else { InstructionMark im(this); L.add_patch_at(code(), locator()); - set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp, NOT_COMPRESSIBLE); } } @@ -308,9 +309,7 @@ void MacroAssembler::call_VM_base(Register oop_result, ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); Label ok; beqz(t0, ok); - int32_t offset = 0; - la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); - jalr(x0, t0, offset); + jalr_patchable(x0, RuntimeAddress(StubRoutines::forward_exception_entry()), t0); bind(ok); } @@ -384,9 +383,7 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { } // call indirectly to solve generation ordering problem - int32_t offset = 0; - la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); - ld(t1, Address(t1, offset)); + ld_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), t1); jalr(t1); pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); @@ -423,9 +420,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { } // call indirectly to solve generation ordering problem - int32_t offset = 0; - la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); - ld(t1, Address(t1, offset)); + ld_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), t1); jalr(t1); pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); @@ -535,12 +530,18 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t bind(done); } -void MacroAssembler::stop(const char* msg) { +// C-Ext: we may need to disable the compression for some instructions +// in some Nodes during C2 code emission, to emit the same constant +// instruction size both in PhaseOutput::scratch_emit_size() +// and the final real code emission. +// See: MachEpilogNode::emit() for more details. +void MacroAssembler::stop(const char* msg, bool compressible) { address ip = pc(); pusha(); if(msg != NULL && ip != NULL) { li(c_rarg0, (uintptr_t)(address)msg); - li(c_rarg1, (uintptr_t)(address)ip); + // C-Ext: use a fixed-length movptr + movptr(c_rarg1, (address)ip, compressible); } else { ShouldNotReachHere(); } @@ -571,8 +572,8 @@ void MacroAssembler::emit_static_call_stub() { // Jump to the entry point of the i2c stub. int32_t offset = 0; - movptr_with_offset(t0, 0, offset); - jalr(x0, t0, offset); + movptr_with_offset(t0, 0, offset, NOT_COMPRESSIBLE); + jalr_nc(x0, t0, offset); } void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments, @@ -658,7 +659,15 @@ void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Reg } void MacroAssembler::nop() { - addi(x0, x0, 0); + if (UseRVC) { + nop_c(); + } else { + addi(x0, x0, 0); + } +} + +void MacroAssembler::nop_nc() { + addi_nc(x0, x0, 0); } void MacroAssembler::mv(Register Rd, Register Rs) { @@ -739,13 +748,13 @@ void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { vfsgnjn_vv(vd, vs, vs); } -void MacroAssembler::la(Register Rd, const address &dest) { +void MacroAssembler::la(Register Rd, const address &dest, bool compressible) { int64_t offset = dest - pc(); if (is_offset_in_range(offset, 32)) { auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit - addi(Rd, Rd, ((int64_t)offset << 52) >> 52); + EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, ((int64_t)offset << 52) >> 52)); } else { - movptr(Rd, dest); + movptr(Rd, dest, compressible); } } @@ -759,7 +768,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) { if (rtype == relocInfo::none) { li(Rd, (intptr_t)(adr.target())); } else { - movptr(Rd, adr.target()); + movptr(Rd, adr.target(), NOT_COMPRESSIBLE); } break; } @@ -775,7 +784,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) { } void MacroAssembler::la(Register Rd, Label &label) { - la(Rd, target(label)); + la(Rd, target(label), NOT_COMPRESSIBLE); } #define INSN(NAME) \ @@ -795,6 +804,19 @@ void MacroAssembler::la(Register Rd, Label &label) { #undef INSN +#define INSN(NAME) \ + void MacroAssembler::NAME##z_nc(Register Rs, const address &dest) { \ + NAME##_nc(Rs, zr, dest); \ + } \ + void MacroAssembler::NAME##z_nc(Register Rs, Label &l, bool is_far) { \ + NAME##_nc(Rs, zr, l, is_far); \ + } \ + + INSN(beq); + INSN(bne); + +#undef INSN + // Float compare branch instructions #define INSN(NAME, FLOATCMP, BRANCH) \ @@ -807,8 +829,9 @@ void MacroAssembler::la(Register Rd, Label &label) { BRANCH(t0, l, is_far); \ } - INSN(beq, feq, bnez); - INSN(bne, feq, beqz); + INSN(beq, feq, bnez_nc); + INSN(bne, feq, beqz_nc); + #undef INSN @@ -818,11 +841,11 @@ void MacroAssembler::la(Register Rd, Label &label) { if(is_unordered) { \ /* jump if either source is NaN or condition is expected */ \ FLOATCMP2##_s(t0, Rs2, Rs1); \ - beqz(t0, l, is_far); \ + beqz_nc(t0, l, is_far); \ } else { \ /* jump if no NaN in source and condition is expected */ \ FLOATCMP1##_s(t0, Rs1, Rs2); \ - bnez(t0, l, is_far); \ + bnez_nc(t0, l, is_far); \ } \ } \ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ @@ -830,11 +853,11 @@ void MacroAssembler::la(Register Rd, Label &label) { if(is_unordered) { \ /* jump if either source is NaN or condition is expected */ \ FLOATCMP2##_d(t0, Rs2, Rs1); \ - beqz(t0, l, is_far); \ + beqz_nc(t0, l, is_far); \ } else { \ /* jump if no NaN in source and condition is expected */ \ FLOATCMP1##_d(t0, Rs1, Rs2); \ - bnez(t0, l, is_far); \ + bnez_nc(t0, l, is_far); \ } \ } @@ -1186,21 +1209,46 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_byte } static int patch_offset_in_jal(address branch, int64_t offset) { - assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); - Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] - Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] - Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] - Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] - return NativeInstruction::instruction_size; // only one instruction + if (!NativeInstruction::is_compressed_instr(branch)) { + assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal instruction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] + Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] + Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] + Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] + return NativeInstruction::instruction_size; // only one instruction + } else { // we must patch it, so I don't check if current instruction is a compressed instruction because it must be. + assert(is_imm_in_range(offset, 11, 1), "offset is too large to be patched in one c.j instruction: use j_nc() instead of your j().\n"); + Assembler::patch_c(branch, 2, 2, (offset & nth_bit(5)) >> 5); // offset[5] ==> branch[2] + Assembler::patch_c(branch, 5, 3, (offset & right_n_bits(4)) >> 1); // offset[3:1] ==> branch[5:3] + Assembler::patch_c(branch, 6, 6, (offset & nth_bit(7)) >> 7); // offset[7] ==> branch[6] + Assembler::patch_c(branch, 7, 7, (offset & nth_bit(6)) >> 6); // offset[6] ==> branch[7] + Assembler::patch_c(branch, 8, 8, (offset & nth_bit(10)) >> 10); // offset[10] ==> branch[8] + Assembler::patch_c(branch, 10, 9, (offset & right_n_bits(10)) >> 8); // offset[9:8] ==> branch[10:9] + Assembler::patch_c(branch, 11, 11, (offset & nth_bit(4)) >> 4); // offset[4] ==> branch[11] + Assembler::patch_c(branch, 12, 12, (offset & nth_bit(11)) >> 11); // offset[11] ==> branch[12] + return NativeInstruction::compressed_instruction_size; // only one instruction + } } static int patch_offset_in_conditional_branch(address branch, int64_t offset) { - assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); - Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] - Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] - Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] - Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] - return NativeInstruction::instruction_size; // only one instruction + if (!NativeInstruction::is_compressed_instr(branch)) { + assert(is_imm_in_range(offset, 12, 1), + "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); + Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] + Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] + Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] + Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] + return NativeInstruction::instruction_size; // only one instruction + } else { + assert(is_imm_in_range(offset, 8, 1), + "offset is too large to be patched in one c.beqz/c.bnez instruction: use beqz_nc()/bnez.nc() instead.\n"); + Assembler::patch_c(branch, 2, 2, (offset & nth_bit(5)) >> 5); + Assembler::patch_c(branch, 4, 3, (offset & right_n_bits(3)) >> 1); + Assembler::patch_c(branch, 6, 5, (offset & right_n_bits(8)) >> 6); + Assembler::patch_c(branch, 11, 10, (offset & right_n_bits(5)) >> 3); + Assembler::patch_c(branch, 12, 12, (offset & nth_bit(8)) >> 8); + return NativeInstruction::compressed_instruction_size; // only one instruction + } } static int patch_offset_in_pc_relative(address branch, int64_t offset) { @@ -1211,18 +1259,24 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) { } static int patch_addr_in_movptr(address branch, address target) { - const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load + // lui + addi + slli(C) + addi + slli(C) + addi/jalr/load + const int size = !UseRVC ? + 6 * NativeInstruction::instruction_size : + 4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size; int32_t lower = ((intptr_t)target << 36) >> 36; int64_t upper = ((intptr_t)target - lower) >> 28; Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[47:28] + target[27] ==> branch[31:12] Assembler::patch(branch + 4, 31, 20, (lower >> 16) & 0xfff); // Addi. target[27:16] ==> branch[31:20] - Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] - Assembler::patch(branch + 20, 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] - return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; + Assembler::patch(branch + (!UseRVC ? 12 : 10), 31, 20, (lower >> 5) & 0x7ff); // Addi. target[15: 5] ==> branch[31:20] + Assembler::patch(branch + (!UseRVC ? 20 : 16), 31, 20, lower & 0x1f); // Addi/Jalr/Load. target[ 4: 0] ==> branch[31:20] + return size; } static int patch_imm_in_li64(address branch, address target) { - const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi + // lui + addi + slli(C) + addi + slli(C) + addi + slli(C) + addi + const int size = !UseRVC ? + 8 * NativeInstruction::instruction_size : + 5 * NativeInstruction::instruction_size + 3 * NativeInstruction::compressed_instruction_size; int64_t lower = (intptr_t)target & 0xffffffff; lower = lower - ((lower << 44) >> 44); int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; @@ -1236,10 +1290,10 @@ static int patch_imm_in_li64(address branch, address target) { Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. // Load the rest 32 bits. - Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. - Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. - Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. - return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; + Assembler::patch(branch + (!UseRVC ? 12 : 10), 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. + Assembler::patch(branch + (!UseRVC ? 20 : 16), 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. + Assembler::patch(branch + (!UseRVC ? 28 : 22), 31, 20, (intptr_t)target & 0xff); // Addi. + return size; } static int patch_imm_in_li32(address branch, int32_t target) { @@ -1253,10 +1307,8 @@ static int patch_imm_in_li32(address branch, int32_t target) { return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; } -static long get_offset_of_jal(address insn_addr) { - assert_cond(insn_addr != NULL); +static long get_offset_of_jal(unsigned insn) { long offset = 0; - unsigned insn = *(unsigned*)insn_addr; long val = (long)Assembler::sextract(insn, 31, 12); offset |= ((val >> 19) & 0x1) << 20; offset |= (val & 0xff) << 12; @@ -1266,14 +1318,12 @@ static long get_offset_of_jal(address insn_addr) { return offset; } -static long get_offset_of_conditional_branch(address insn_addr) { +static long get_offset_of_conditional_branch(unsigned insn) { long offset = 0; - assert_cond(insn_addr != NULL); - unsigned insn = *(unsigned*)insn_addr; offset = (long)Assembler::sextract(insn, 31, 31); - offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); - offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); - offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); + offset = (offset << 12) | ((Assembler::sextract(insn, 7, 7) & 0x1) << 11); + offset = offset | ((Assembler::sextract(insn, 30, 25) & 0x3f) << 5); + offset = offset | ((Assembler::sextract(insn, 11, 8) & 0xf) << 1); offset = (offset << 41) >> 41; return offset; } @@ -1290,9 +1340,9 @@ static long get_offset_of_pc_relative(address insn_addr) { static address get_target_of_movptr(address insn_addr) { assert_cond(insn_addr != NULL); intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28; // Lui. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. + target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 2)))[3], 31, 20)) << 5; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 4)))[5], 31, 20)); // Addi/Jalr/Load. return (address) target_address; } @@ -1300,9 +1350,9 @@ static address get_target_of_li64(address insn_addr) { assert_cond(insn_addr != NULL); intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. - target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 2)))[3], 31, 20)) << 20; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 4)))[5], 31, 20)) << 8; // Addi. + target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 6)))[7], 31, 20)); // Addi. return (address)target_address; } @@ -1342,9 +1392,9 @@ address MacroAssembler::target_addr_for_insn(address insn_addr) { long offset = 0; assert_cond(insn_addr != NULL); if (NativeInstruction::is_jal_at(insn_addr)) { // jal - offset = get_offset_of_jal(insn_addr); + offset = get_offset_of_jal(*(unsigned*)insn_addr); } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne - offset = get_offset_of_conditional_branch(insn_addr); + offset = get_offset_of_conditional_branch(*(unsigned*)insn_addr); } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load offset = get_offset_of_pc_relative(insn_addr); } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr @@ -1381,8 +1431,7 @@ void MacroAssembler::reinit_heapbase() { mv(xheapbase, CompressedOops::ptrs_base()); } else { int32_t offset = 0; - la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset); - ld(xheapbase, Address(xheapbase, offset)); + ld_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), xheapbase); } } } @@ -1402,7 +1451,7 @@ void MacroAssembler::mvw(Register Rd, int32_t imm32) { void MacroAssembler::mv(Register Rd, Address dest) { assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); code_section()->relocate(pc(), dest.rspec()); - movptr(Rd, dest.target()); + movptr(Rd, dest.target(), NOT_COMPRESSIBLE); } void MacroAssembler::mv(Register Rd, address addr) { @@ -2483,10 +2532,10 @@ void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { // the code cache cannot exceed 2Gb. la_patchable(tmp, entry, offset); if (cbuf != NULL) { cbuf->set_insts_mark(); } - jalr(x0, tmp, offset); + jalr_nc(x0, tmp, offset); } else { if (cbuf != NULL) { cbuf->set_insts_mark(); } - j(entry); + j_nc(entry); } } @@ -2500,10 +2549,10 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { // the code cache cannot exceed 2Gb. la_patchable(tmp, entry, offset); if (cbuf != NULL) { cbuf->set_insts_mark(); } - jalr(x1, tmp, offset); // link + jalr_nc(x1, tmp, offset); // link } else { if (cbuf != NULL) { cbuf->set_insts_mark(); } - jal(entry); // link + jal_nc(entry); // link } } @@ -2762,10 +2811,31 @@ void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &o auipc(reg1, (int32_t)distance + 0x800); offset = ((int32_t)distance << 20) >> 20; } else { - movptr_with_offset(reg1, dest.target(), offset); + movptr_with_offset(reg1, dest.target(), offset, NOT_COMPRESSIBLE); } } +void MacroAssembler::ld_patchable(Register Rd, const Address &dest, Register tmp) { + int offset = 0; + la_patchable(tmp, dest, offset); + // C-Ext: use uncompressed instructions to match pd_patch_instruction_size() + ld_nc(Rd, tmp, offset); +} + +void MacroAssembler::addi_patchable(Register Rd, const Address &dest, Register tmp) { + int offset = 0; + la_patchable(tmp, dest, offset); + // C-Ext: use uncompressed instructions to match pd_patch_instruction_size() + addi_nc(Rd, tmp, offset); +} + +void MacroAssembler::jalr_patchable(Register Rd, const Address &dest, Register tmp) { + int offset = 0; + la_patchable(tmp, dest, offset); + // C-Ext: use uncompressed instructions to match pd_patch_instruction_size() + jalr_nc(x1, tmp, offset); +} + void MacroAssembler::build_frame(int framesize) { assert(framesize >= 2, "framesize must include space for FP/RA"); assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); @@ -2784,7 +2854,7 @@ void MacroAssembler::remove_frame(int framesize) { add(sp, sp, framesize); } -void MacroAssembler::reserved_stack_check() { +void MacroAssembler::reserved_stack_check(bool compressible) { // testing if reserved zone needs to be enabled Label no_reserved_zone_enabling; @@ -2793,18 +2863,14 @@ void MacroAssembler::reserved_stack_check() { enter(); // RA and FP are live. mv(c_rarg0, xthread); - int32_t offset = 0; - la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); - jalr(x1, t0, offset); + jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), t0); leave(); // We have already removed our own frame. // throw_delayed_StackOverflowError will think that it's been // called by our caller. - offset = 0; - la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); - jalr(x0, t0, offset); - should_not_reach_here(); + jalr_patchable(x0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), t0); + should_not_reach_here(compressible); bind(no_reserved_zone_enabling); } @@ -2892,9 +2958,9 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { if (cbuf != NULL) { cbuf->set_insts_mark(); } relocate(entry.rspec()); if (!far_branches()) { - jal(entry.target()); + jal_nc(entry.target()); } else { - jal(pc()); + jal_nc(pc()); } // just need to return a non-null address postcond(pc() != badAddress); @@ -2903,7 +2969,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { address MacroAssembler::ic_call(address entry, jint method_index) { RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); - movptr(t1, (address)Universe::non_oop_word()); + movptr(t1, (address)Universe::non_oop_word(), NOT_COMPRESSIBLE); assert_cond(entry != NULL); return trampoline_call(Address(entry, rh)); } @@ -2933,7 +2999,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, // make sure 4 byte aligned here, so that the destination address would be // 8 byte aligned after 3 intructions - while (offset() % wordSize == 0) { nop(); } + // C-Ext: when we reach here we may get a 2-byte alignment and + // nop() will be 2 bytes in length. + while (offset() % wordSize != 4) { nop(); } relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset)); @@ -2943,11 +3011,12 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, // - load the call // - call Label target; - ld(t0, target); // auipc + ld - jr(t0); // jalr + ld_nc(t0, target); // auipc + ld + jr_nc(t0); // jalr bind(target); assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, "should be"); + assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64"); emit_int64((intptr_t)dest); const address stub_start_addr = addr_at(stub_start_offset); @@ -2989,8 +3058,7 @@ void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { assert_different_registers(src1, t0); int32_t offset; - la_patchable(t0, src2, offset); - ld(t0, Address(t0, offset)); + ld_patchable(t0, src2, t0); beq(src1, t0, equal); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 1913638e8f6..4907445f595 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -136,7 +136,7 @@ class MacroAssembler: public Assembler { void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); // last Java Frame (fills frame anchor) - void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp); + void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp, bool compressible = true); void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register temp); void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc,Register temp); @@ -357,13 +357,13 @@ class MacroAssembler: public Assembler { } // prints msg, dumps registers and stops execution - void stop(const char* msg); + void stop(const char* msg, bool compressible = true); static void debug64(char* msg, int64_t pc, int64_t regs[]); void unimplemented(const char* what = ""); - void should_not_reach_here() { stop("should not reach here"); } + void should_not_reach_here(bool compressible = true) { stop("should not reach here", compressible); } static address target_addr_for_insn(address insn_addr); @@ -398,6 +398,7 @@ class MacroAssembler: public Assembler { public: // Standard pseudoinstruction void nop(); + void nop_nc(); void mv(Register Rd, Register Rs) ; void notr(Register Rd, Register Rs); void neg(Register Rd, Register Rs); @@ -444,13 +445,13 @@ class MacroAssembler: public Assembler { void fsflagsi(unsigned imm); void beqz(Register Rs, const address &dest); + void bnez(Register Rs, const address &dest); void blez(Register Rs, const address &dest); void bgez(Register Rs, const address &dest); void bltz(Register Rs, const address &dest); void bgtz(Register Rs, const address &dest); - void bnez(Register Rs, const address &dest); void la(Register Rd, Label &label); - void la(Register Rd, const address &dest); + void la(Register Rd, const address &dest, bool compressible = true); void la(Register Rd, const Address &adr); //label void beqz(Register Rs, Label &l, bool is_far = false); @@ -472,6 +473,12 @@ class MacroAssembler: public Assembler { void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); + // C-Ext: incompressible version + void beqz_nc(Register Rs, const address &dest); + void bnez_nc(Register Rs, const address &dest); + void beqz_nc(Register Rs, Label &l, bool is_far = false); + void bnez_nc(Register Rs, Label &l, bool is_far = false); + void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } void push_reg(Register Rs); @@ -612,6 +619,12 @@ class MacroAssembler: public Assembler { void la_patchable(Register reg1, const Address &dest, int32_t &offset); + // Note: programmers should use these functions + // if wanting to write the same logic - to prevent from misuse. + void ld_patchable(Register Rd, const Address &dest, Register tmp); + void addi_patchable(Register Rd, const Address &dest, Register tmp); + void jalr_patchable(Register Rd, const Address &dest, Register tmp); + virtual void _call_Unimplemented(address call_site) { mv(t1, call_site); } @@ -633,7 +646,7 @@ class MacroAssembler: public Assembler { void build_frame(int framesize); void remove_frame(int framesize); - void reserved_stack_check(); + void reserved_stack_check(bool compressible = true); void get_polling_page(Register dest, relocInfo::relocType rtype); address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); @@ -788,11 +801,9 @@ class MacroAssembler: public Assembler { void ld_constant(Register dest, const Address &const_addr) { if (NearCpool) { - ld(dest, const_addr); + ld_nc(dest, const_addr); } else { - int32_t offset = 0; - la_patchable(dest, InternalAddress(const_addr.target()), offset); - ld(dest, Address(dest, offset)); + ld_patchable(dest, InternalAddress(const_addr.target()), dest); } } diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp index 977e0c74445..ebf31b1aeb2 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp @@ -39,64 +39,145 @@ #include "c1/c1_Runtime1.hpp" #endif +uint32_t NativeInstruction::extract_rs1(address instr, int &size) { + assert_cond(instr != NULL); + if (is_compressed_instr(instr)) { + size = compressed_instruction_size; + uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0); + switch (op) { + case 0b00: { + return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7); + } + case 0b01: { + if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) { + return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7); + } else { + return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7); + } + } + case 0b10: { + return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7); + } + default: + ShouldNotReachHere(); + } + return 0; + } else { + size = instruction_size; + return Assembler::extract(((unsigned*)instr)[0], 19, 15); + } +} + +uint32_t NativeInstruction::extract_rs2(address instr, int &size) { + assert_cond(instr != NULL); + if (is_compressed_instr(instr)) { + size = compressed_instruction_size; + uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0); + switch (op) { + case 0b00: { + return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2); + } + case 0b01: { + if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) { + ShouldNotReachHere(); + return 0; + } else { + return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2); + } + } + case 0b10: { + return Assembler::extract_c(((uint16_t*)instr)[0], 6, 2); + } + default: + ShouldNotReachHere(); + } + return 0; + } else { + size = instruction_size; + return Assembler::extract(((unsigned*)instr)[0], 24, 20); + } +} + +uint32_t NativeInstruction::extract_rd(address instr, int &size) { + assert_cond(instr != NULL); + if (is_compressed_instr(instr)) { + size = compressed_instruction_size; + uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0); + switch (op) { + case 0b00: { + return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2); + } + case 0b01: { + if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) { + return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7); + } else { + return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7); + } + } + case 0b10: { + return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7); + } + default: + ShouldNotReachHere(); + } + return 0; + } else { + size = instruction_size; + return Assembler::extract(((unsigned*)instr)[0], 11, 7); + } +} + bool NativeInstruction::is_pc_relative_at(address instr) { // auipc + jalr // auipc + addi // auipc + load // auipc + fload_load - if ((is_auipc_at(instr)) && - (is_addi_at(instr + 4) || is_jalr_at(instr + 4) || is_load_at(instr + 4) || is_float_load_at(instr + 4)) && - check_pc_relative_data_dependency(instr)) { - return true; - } - return false; + return (is_auipc_at(instr)) && + (is_addi_at(instr + instruction_size) || + is_jalr_at(instr + instruction_size) || + is_load_at(instr + instruction_size) || + is_float_load_at(instr + instruction_size)) && + check_pc_relative_data_dependency(instr); } // ie:ld(Rd, Label) bool NativeInstruction::is_load_pc_relative_at(address instr) { - if (is_auipc_at(instr) && // auipc - is_ld_at(instr + 4) && // ld - check_load_pc_relative_data_dependency(instr)) { - return true; - } - return false; + return is_auipc_at(instr) && // auipc + is_ld_at(instr + instruction_size) && // ld + check_load_pc_relative_data_dependency(instr); } bool NativeInstruction::is_movptr_at(address instr) { - if (is_lui_at(instr) && // Lui - is_addi_at(instr + 4) && // Addi - is_slli_shift_at(instr + 8, 11) && // Slli Rd, Rs, 11 - is_addi_at(instr + 12) && // Addi - is_slli_shift_at(instr + 16, 5) && // Slli Rd, Rs, 5 - (is_addi_at(instr + 20) || is_jalr_at(instr + 20) || is_load_at(instr + 20)) && // Addi/Jalr/Load - check_movptr_data_dependency(instr)) { - return true; - } - return false; + address pos = instr; + int size = 0; + return is_lui_at(pos) && // Lui + is_addi_at(pos += instruction_size) && // Addi + is_slli_shift_at(pos += instruction_size, 11, size) && // Slli Rd, Rs, 11 + is_addi_at(pos += size) && // Addi + is_slli_shift_at(pos += instruction_size, 5, size) && // Slli Rd, Rs, 5 + (is_addi_at(pos += size) || is_jalr_at(pos) || is_load_at(pos)) && // Addi/Jalr/Load + check_movptr_data_dependency(instr); } bool NativeInstruction::is_li32_at(address instr) { - if (is_lui_at(instr) && // lui - is_addiw_at(instr + 4) && // addiw - check_li32_data_dependency(instr)) { - return true; - } - return false; + address pos = instr; + return is_lui_at(pos) && // lui + is_addiw_at(pos += instruction_size) && // addiw + check_li32_data_dependency(instr); } bool NativeInstruction::is_li64_at(address instr) { - if (is_lui_at(instr) && // lui - is_addi_at(instr + 4) && // addi - is_slli_shift_at(instr + 8, 12)&& // Slli Rd, Rs, 12 - is_addi_at(instr + 12) && // addi - is_slli_shift_at(instr + 16, 12) && // Slli Rd, Rs, 12 - is_addi_at(instr + 20) && // addi - is_slli_shift_at(instr + 24, 8) && // Slli Rd, Rs, 8 - is_addi_at(instr + 28) && // addi - check_li64_data_dependency(instr)) { - return true; - } - return false; + address pos = instr; + int size = 0; + return is_lui_at(pos) && // lui + is_addi_at(pos += instruction_size) && // addi + is_slli_shift_at(pos += instruction_size, 12, size) && // Slli Rd, Rs, 12 + is_addi_at(pos += size) && // addi + is_slli_shift_at(pos += instruction_size, 12, size) && // Slli Rd, Rs, 12 + is_addi_at(pos += size) && // addi + is_slli_shift_at(pos += instruction_size, 8, size) && // Slli Rd, Rs, 8 + is_addi_at(pos += size) && // addi + check_li64_data_dependency(instr); } void NativeCall::verify() { @@ -203,7 +284,7 @@ void NativeMovConstReg::set_data(intptr_t x) { } else { // Store x into the instruction stream. MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); - ICache::invalidate_range(instruction_address(), movptr_instruction_size); + ICache::invalidate_range(instruction_address(), get_movptr_instruction_size()); } // Find and replace the oop/metadata corresponding to this @@ -341,7 +422,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); - assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || + assert(nativeInstruction_at(verified_entry)->is_jump_or_nop_nc() || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), "riscv64 cannot replace non-jump with jump"); @@ -371,14 +452,14 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; - CodeBuffer cb(code_pos, instruction_size); + CodeBuffer cb(code_pos, get_instruction_size()); MacroAssembler a(&cb); int32_t offset = 0; - a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli - a.jalr(x0, t0, offset); // jalr + a.movptr_with_offset(t0, entry, offset, NOT_COMPRESSIBLE); // lui, addi, slli, addi, slli + a.jalr_nc(x0, t0, offset); // jalr - ICache::invalidate_range(code_pos, instruction_size); + ICache::invalidate_range(code_pos, get_instruction_size()); } // MT-safe patching of a long jump instruction. diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp index 117d58e8e28..dd06afc813e 100644 --- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp @@ -53,7 +53,8 @@ class NativeInstruction { friend bool is_NativeCallTrampolineStub_at(address); public: enum { - instruction_size = 4 + instruction_size = 4, + compressed_instruction_size = 2, }; juint encoding() const { @@ -65,26 +66,49 @@ class NativeInstruction { bool is_call() const { return is_call_at(addr_at(0)); } bool is_jump() const { return is_jump_at(addr_at(0)); } + static bool is_compressed_instr(address instr) { + if ((((unsigned*)instr)[0] & 0b11) == 0b11) { + return false; + } + assert((((uint16_t *)instr)[0] & 0b11) != 0b11, "seems instr is not an illegal instruction beginning: 0x%x", ((unsigned*)instr)[0]); + return true; + } + static int instr_size(address instr) { + return is_compressed_instr(instr) ? compressed_instruction_size : instruction_size; + } static bool is_jal_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1101111; } static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100111 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } + Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } static bool is_branch_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100011; } static bool is_ld_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); } + Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); } static bool is_load_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011; } static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000111; } static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010111; } static bool is_jump_at(address instr) { assert_cond(instr != NULL); return (is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr)); } static bool is_addi_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } + Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0011011 && - Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } + Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); } static bool is_lui_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0110111; } - static bool is_slli_shift_at(address instr, uint32_t shift) { + static bool is_slli_shift_at(address instr, uint32_t shift) { int size = 0; return is_slli_shift_at(instr, shift, size); } + static uint16_t extract_slli_c(address instr) { + uint16_t low5 = Assembler::extract_c(((uint16_t*)instr)[0], 6, 2); + uint16_t high1 = Assembler::extract_c(((uint16_t*)instr)[0], 12, 12); + return (high1 << 5 | low5); + } + static bool is_slli_shift_at(address instr, uint32_t shift, int &size) { assert_cond(instr != NULL); + if (is_compressed_instr(instr)) { + return Assembler::extract_c(((uint16_t*)instr)[0], 15, 13) == 0b000 && + Assembler::extract_c(((uint16_t*)instr)[0], 1, 0) == 0b10 && + extract_slli_c(instr) == shift && + (size = compressed_instruction_size); + } return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && // opcode field Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b001 && // funct3 field, select the type of operation - Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field + Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift) && // shamt field + (size = instruction_size); } // return true if the (index1~index2) field of instr1 is equal to (index3~index4) field of instr2, otherwise false @@ -93,6 +117,13 @@ class NativeInstruction { return Assembler::extract(((unsigned*)instr1)[0], index1, index2) == Assembler::extract(((unsigned*)instr2)[0], index3, index4); } + static uint32_t extract_rs1(address instr) { int size = 0; return extract_rs1(instr, size); } + static uint32_t extract_rs2(address instr) { int size = 0; return extract_rs2(instr, size); } + static uint32_t extract_rd(address instr) { int size = 0; return extract_rd(instr, size); } + static uint32_t extract_rs1(address instr, int &size); + static uint32_t extract_rs2(address instr, int &size); + static uint32_t extract_rd(address instr, int &size); + // the instruction sequence of movptr is as below: // lui // addi @@ -101,15 +132,21 @@ class NativeInstruction { // slli // addi/jalr/load static bool check_movptr_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addi and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7); // check the rs1 field of addi/jalr/load and the rd field of slli + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instr_size(slli1); + address slli2 = addi2 + instruction_size; + address final = slli2 + instr_size(slli2); + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(final) == extract_rd(slli2); } // the instruction sequence of li64 is as below: @@ -121,44 +158,61 @@ class NativeInstruction { // addi // slli // addi - static bool check_li64_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addi and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) && // check the rs1 field and the rd field fof slli - compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 20, 19, 15, instr + 20, 11, 7) && // check the rs1 field and the rd field of addi - compare_instr_field(instr + 24, 19, 15, instr + 20, 11, 7) && // check the rs1 field of slli and the rd field of addi - compare_instr_field(instr + 24, 19, 15, instr + 24, 11, 7) && // check the rs1 field and the rd field of slli - compare_instr_field(instr + 28, 19, 15, instr + 24, 11, 7) && // check the rs1 field of addi and the rd field of slli - compare_instr_field(instr + 28, 19, 15, instr + 28, 11, 7); // check the rs1 field and the rd field of addi + static bool check_li64_data_dependency(address instr) { // FIXME: maybe retrive back origin code because we can only optimize 'slli' here. + address lui = instr; + address addi1 = lui + instruction_size; + address slli1 = addi1 + instruction_size; + address addi2 = slli1 + instr_size(slli1); + address slli2 = addi2 + instruction_size; + address addi3 = slli2 + instr_size(slli2); + address slli3 = addi3 + instruction_size; + address addi4 = slli3 + instr_size(slli3); + return extract_rs1(addi1) == extract_rd(lui) && + extract_rs1(addi1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(addi1) && + extract_rs1(slli1) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(slli1) && + extract_rs1(addi2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(addi2) && + extract_rs1(slli2) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(slli2) && + extract_rs1(addi3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(addi3) && + extract_rs1(slli3) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(slli3) && + extract_rs1(addi4) == extract_rd(addi4); } // the instruction sequence of li32 is as below: // lui // addiw static bool check_li32_data_dependency(address instr) { - return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) && // check the rs1 field of addiw and the rd field of lui - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7); // check the rs1 field and the rd field of addiw + address lui = instr; + address addiw = lui + instruction_size; + + return extract_rs1(addiw) == extract_rd(lui) && + extract_rs1(addiw) == extract_rd(addiw); } // the instruction sequence of pc-relative is as below: // auipc // jalr/addi/load/float_load static bool check_pc_relative_data_dependency(address instr) { - return compare_instr_field(instr, 11, 7, instr + 4, 19, 15); // check the rd field of auipc and the rs1 field of jalr/addi/load/float_load + address auipc = instr; + address final = auipc + instruction_size; + + return extract_rs1(final) == extract_rd(auipc); } // the instruction sequence of load_label is as below: // auipc // load static bool check_load_pc_relative_data_dependency(address instr) { - return compare_instr_field(instr, 11, 7, instr + 4, 11, 7) && // check the rd field of auipc and the rd field of load - compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7); // check the rs1 field of load and the rd field of load + address auipc = instr; + address load = auipc + instruction_size; + + return extract_rd(load) == extract_rd(auipc) && + extract_rs1(load) == extract_rd(load); } static bool is_movptr_at(address instr); @@ -168,6 +222,7 @@ class NativeInstruction { static bool is_load_pc_relative_at(address branch); static bool is_call_at(address instr) { + assert(!is_compressed_instr(instr), "we need to reserve the 4-byte instruction to handle all cases"); if (is_jal_at(instr) || is_jalr_at(instr)) { return true; } @@ -176,9 +231,11 @@ class NativeInstruction { static bool is_lwu_to_zr(address instr); inline bool is_nop(); + inline bool is_compressed_nop(); + inline bool is_uncompressed_nop(); inline bool is_illegal(); inline bool is_return(); - inline bool is_jump_or_nop(); + inline bool is_jump_or_nop_nc(); inline bool is_cond_jump(); bool is_safepoint_poll(); bool is_sigill_zombie_not_entrant(); @@ -189,6 +246,7 @@ class NativeInstruction { jint int_at(int offset) const { return *(jint*) addr_at(offset); } juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + jushort uint16_at(int offset) const { return *(jushort *) addr_at(offset); } address ptr_at(int offset) const { return *(address*) addr_at(offset); } @@ -318,13 +376,23 @@ inline NativeCall* nativeCall_before(address return_address) { class NativeMovConstReg: public NativeInstruction { public: enum RISCV64_specific_constants { - movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). + movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). + compressed_movptr_instruction_size = 4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C), addi. See movptr(). movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). + compressed_movptr_with_offset_instruction_size = 3 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C). See movptr_with_offset(). load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld instruction_offset = 0, displacement_offset = 0 }; + static const int get_movptr_with_offset_instruction_size() { + return !UseRVC ? movptr_with_offset_instruction_size : compressed_movptr_with_offset_instruction_size; + } + + static const int get_movptr_instruction_size() { + return !UseRVC ? movptr_instruction_size : compressed_movptr_instruction_size; + } + address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { // if the instruction at 5 * instruction_size is addi, @@ -333,12 +401,12 @@ class NativeMovConstReg: public NativeInstruction { // However, when the instruction at 5 * instruction_size isn't addi, // the next instruction address should be addr_at(5 * instruction_size) if (nativeInstruction_at(instruction_address())->is_movptr()) { - if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { + if (is_addi_at(addr_at(get_movptr_with_offset_instruction_size()))) { // Assume: lui, addi, slli, addi, slli, addi - return addr_at(movptr_instruction_size); + return addr_at(get_movptr_instruction_size()); } else { // Assume: lui, addi, slli, addi, slli - return addr_at(movptr_with_offset_instruction_size); + return addr_at(get_movptr_with_offset_instruction_size()); } } else if (is_load_pc_relative_at(instruction_address())) { // Assume: auipc, ld @@ -353,7 +421,7 @@ class NativeMovConstReg: public NativeInstruction { void flush() { if (!maybe_cpool_ref(instruction_address())) { - ICache::invalidate_range(instruction_address(), movptr_instruction_size); + ICache::invalidate_range(instruction_address(), get_movptr_instruction_size()); } } @@ -422,10 +490,10 @@ inline NativeMovRegMem* nativeMovRegMem_at (address addr) { class NativeJump: public NativeInstruction { public: enum RISCV64_specific_constants { - instruction_size = 4, + instruction_size = NativeInstruction::instruction_size, instruction_offset = 0, data_offset = 0, - next_instruction_offset = 4 + next_instruction_offset = NativeInstruction::instruction_size }; address instruction_address() const { return addr_at(instruction_offset); } @@ -456,12 +524,18 @@ inline NativeJump* nativeJump_at(address addr) { class NativeGeneralJump: public NativeJump { public: enum RISCV64_specific_constants { - instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + compressed_instruction_size = 4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C), jalr instruction_offset = 0, data_offset = 0, - next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr + normal_next_instruction_offset = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr + compressed_next_instruction_offset = 4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size // lui, addi, slli(C), addi, slli(C), jalr }; + static const int get_instruction_size() { + return !UseRVC ? instruction_size : compressed_instruction_size; + } + address jump_destination() const; static void insert_unconditional(address code_pos, address entry); @@ -481,13 +555,27 @@ class NativeIllegalInstruction: public NativeInstruction { static void insert(address code_pos); }; -inline bool NativeInstruction::is_nop() { - uint32_t insn = *(uint32_t*)addr_at(0); +inline bool NativeInstruction::is_nop() { + return is_compressed_nop() || is_uncompressed_nop(); +} + +inline bool NativeInstruction::is_compressed_nop() { + address instr_addr = addr_at(0); + if (is_compressed_instr(instr_addr)) { + uint16_t insn = *(uint16_t*)instr_addr; + return insn == 0x1; + } + return false; +} + +inline bool NativeInstruction::is_uncompressed_nop() { + address instr_addr = addr_at(0); + uint32_t insn = *(uint32_t*)instr_addr; return insn == 0x13; } -inline bool NativeInstruction::is_jump_or_nop() { - return is_nop() || is_jump(); +inline bool NativeInstruction::is_jump_or_nop_nc() { + return is_uncompressed_nop() || is_jump(); } // Call trampoline stubs. diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp index e3203a5f032..99c65b13b8a 100644 --- a/src/hotspot/cpu/riscv/register_riscv.hpp +++ b/src/hotspot/cpu/riscv/register_riscv.hpp @@ -58,7 +58,11 @@ class RegisterImpl: public AbstractRegisterImpl { enum { number_of_registers = 32, number_of_byte_registers = 32, - max_slots_per_register = 2 + max_slots_per_register = 2, + + // C-Ext: integer registers in the range of [x8~x15] are correspond for RVC. Please see Table 16.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, }; // derived registers, offsets, and addresses @@ -71,10 +75,13 @@ class RegisterImpl: public AbstractRegisterImpl { // accessors int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int compressed_encoding() const { assert(is_compressed_valid(), "invalid compressed register"); return ((intptr_t)this - compressed_register_base); } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + bool is_compressed_valid() const { return compressed_register_base <= (intptr_t)this && (intptr_t)this <= compressed_register_top; } bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } const char* name() const; int encoding_nocheck() const { return (intptr_t)this; } + int compressed_encoding_nocheck() const { return ((intptr_t)this - compressed_register_base); } // Return the bit which represents this register. This is intended // to be ORed into a bitmask: for usage see class RegSet below. @@ -131,7 +138,11 @@ class FloatRegisterImpl: public AbstractRegisterImpl { public: enum { number_of_registers = 32, - max_slots_per_register = 2 + max_slots_per_register = 2, + + // C-Ext: float registers in the range of [f8~f15] are correspond for RVC. Please see Table 16.2 in spec. + compressed_register_base = 8, + compressed_register_top = 15, }; // construction @@ -144,8 +155,11 @@ class FloatRegisterImpl: public AbstractRegisterImpl { // accessors int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int compressed_encoding() const { assert(is_compressed_valid(), "invalid compressed register"); return ((intptr_t)this - compressed_register_base); } int encoding_nocheck() const { return (intptr_t)this; } + int compressed_encoding_nocheck() const { return ((intptr_t)this - compressed_register_base); } bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + bool is_compressed_valid() const { return compressed_register_base <= (intptr_t)this && (intptr_t)this <= compressed_register_top; } const char* name() const; }; diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 79cba765ea1..c2f8a955972 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -1156,14 +1156,15 @@ bool needs_acquiring_load_reserved(const Node *n) int MachCallStaticJavaNode::ret_addr_offset() { - // call should be a simple jal - int off = 4; - return off; + // jal + return 1 * NativeInstruction::instruction_size; } int MachCallDynamicJavaNode::ret_addr_offset() { - return 28; // movptr, jal + return 4 * NativeInstruction::instruction_size + + 2 * (!UseRVC ? NativeInstruction::instruction_size : NativeInstruction::compressed_instruction_size) + + 1 * NativeInstruction::instruction_size; // movptr, jal } int MachCallRuntimeNode::ret_addr_offset() { @@ -1171,19 +1172,26 @@ int MachCallRuntimeNode::ret_addr_offset() { // jal(addr) // or with far branches // jal(trampoline_stub) - // for real runtime callouts it will be six instructions + // for real runtime callouts it will be 12 instructions // see riscv64_enc_java_to_runtime - // la(t1, retaddr) - // la(t0, RuntimeAddress(addr)) - // addi(sp, sp, -2 * wordSize) - // sd(zr, Address(sp)) - // sd(t1, Address(sp, wordSize)) - // jalr(t0) + // la(t1, retaddr) -> auipc + addi + // la(t0, RuntimeAddress(addr)) -> lui + addi + slli(C) + addi + slli(C) + addi + // addi(sp, sp, -2 * wordSize) -> addi(C) + // sd(zr, Address(sp)) -> sd(C) + // sd(t1, Address(sp, wordSize)) -> sd(C) + // jalr(t0) -> jalr(C) CodeBlob *cb = CodeCache::find_blob(_entry_point); if (cb != NULL) { return 1 * NativeInstruction::instruction_size; } else { - return 12 * NativeInstruction::instruction_size; + const int instruction_size = NativeInstruction::instruction_size; + const int compressed_instruction_size = (!UseRVC ? instruction_size : NativeInstruction::compressed_instruction_size); + return 2 * instruction_size + + 4 * instruction_size + 2 * compressed_instruction_size + + 1 * compressed_instruction_size + + 1 * compressed_instruction_size + + 1 * compressed_instruction_size + + 1 * compressed_instruction_size; } } @@ -1192,6 +1200,41 @@ int MachCallNativeNode::ret_addr_offset() { return -1; } +// C-Ext: With C-Ext a call may get 2-byte aligned. +// The offset encoding in jal ranges bits [12, 31], which could span the cache line. +// Patching this unaligned address will make the write operation not atomic. +// Other threads may be running the same piece of code at full speed, causing concurrency issues. +// So we must ensure that it does not span a cache line so that it can be patched. +int CallStaticJavaDirectNode::compute_padding(int current_offset) const +{ + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + +// C-Ext: With C-Ext a call may get 2-byte aligned. +// The offset encoding in jal ranges bits [12, 31], which could span the cache line. +// Patching this unaligned address will make the write operation not atomic. +// Other threads may be running the same piece of code at full speed, causing concurrency issues. +// So we must ensure that it does not span a cache line so that it can be patched. +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const +{ + // skip the movptr in MacroAssembler::ic_call(): + // lui + addi + slli(C) + addi + slli(C) + addi + // Though movptr() has already 4-byte aligned with or without C-Ext, + // We need to prevent from further changes by explicitly calculating the size. + const int instruction_size = NativeInstruction::instruction_size; + const int compressed_instruction_size = (!UseRVC ? instruction_size : NativeInstruction::compressed_instruction_size); + const int movptr_size = + 2 * instruction_size + + 1 * compressed_instruction_size + + 1 * instruction_size + + 1 * compressed_instruction_size + + 1 * instruction_size; + current_offset += movptr_size; + // to make sure the address of jal 4-byte aligned. + return align_up(current_offset, alignment_required()) - current_offset; +} + //============================================================================= #ifndef PRODUCT @@ -1226,7 +1269,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { } uint MachNopNode::size(PhaseRegAlloc*) const { - return _count * NativeInstruction::instruction_size; + return _count * (!UseRVC ? NativeInstruction::instruction_size : NativeInstruction::compressed_instruction_size); } //============================================================================= @@ -1295,7 +1338,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { // insert a nop at the start of the prolog so we can patch in a // branch if we need to invalidate the method later - __ nop(); + __ nop_nc(); // 4 bytes assert_cond(C != NULL); @@ -1387,7 +1430,14 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { __ remove_frame(framesize); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { - __ reserved_stack_check(); + // C-Ext: we need to emit instructions of the same constant size here. + // This Node will emit should_not_reach_here(), further emitting a movptr of pc() address. + // However, C2 will do PhaseOutput::scratch_emit_size() to simulate the size of Node - + // this time, the pc() is a different value from the final emission and it may get compressed. + // We may get a case that Node size is different between scratch_emit and real emission phase, + // which are not allowed. So we need to emit the same constant size by disabling compression + // of the movptr of pc() to align with the logic. + __ reserved_stack_check(NOT_COMPRESSIBLE); } if (do_polling() && C->is_method_compilation()) { @@ -1644,7 +1694,8 @@ void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { int reg = ra_->get_encode(this); if (is_imm_in_range(offset, 12, 0)) { - __ addi(as_Register(reg), sp, offset); + // C-Ext: See BoxLockNode::size(). We need to manually calculate this node's size. + __ addi_nc(as_Register(reg), sp, offset); } else if (is_imm_in_range(offset, 32, 0)) { __ li32(t0, offset); __ add(as_Register(reg), sp, t0); @@ -9792,6 +9843,7 @@ instruct CallStaticJavaDirect(method meth) riscv64_enc_call_epilog ); ins_pipe(pipe_class_call); + ins_alignment(4); %} // TO HERE @@ -9811,6 +9863,7 @@ instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) riscv64_enc_call_epilog ); ins_pipe(pipe_class_call); + ins_alignment(4); %} // Call Runtime Instruction diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index 506ff104603..dc9220652ad 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -348,9 +348,7 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ mv(c_rarg0, xmethod); __ mv(c_rarg1, ra); - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), t0); // Explicit fence.i required because fixup_callers_callsite may change the code // stream. @@ -1020,9 +1018,7 @@ static void rt_call(MacroAssembler* masm, address dest) { if (cb) { __ far_call(RuntimeAddress(dest)); } else { - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(dest), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(dest), t0); } } @@ -1147,7 +1143,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, int vep_offset = ((intptr_t)__ pc()) - start; // First instruction must be a nop as it may need to be patched on deoptimisation - __ nop(); + __ nop_nc(); gen_special_dispatch(masm, method, in_sig_bt, @@ -1298,7 +1294,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. - __ nop(); + __ nop_nc(); if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { Label L_skip_barrier; @@ -1799,9 +1795,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #ifndef PRODUCT assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), t0); // Restore any method result value restore_native_result(masm, ret_type, stack_slots); @@ -2018,9 +2012,7 @@ void SharedRuntime::generate_deopt_blob() { #endif // ASSERT __ mv(c_rarg0, xthread); __ mv(c_rarg1, xcpool); - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), t0); __ bind(retaddr); // Need to have an oopmap that tells fetch_unroll_info where to @@ -2156,9 +2148,7 @@ void SharedRuntime::generate_deopt_blob() { __ mv(c_rarg0, xthread); __ mv(c_rarg1, xcpool); // second arg: exec_mode - offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), t0); // Set an oopmap for the call site // Use the same PC we used for the last java frame @@ -2242,11 +2232,9 @@ void SharedRuntime::generate_uncommon_trap_blob() { __ mv(c_rarg0, xthread); __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); - int32_t offset = 0; - __ la_patchable(t0, + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, - Deoptimization::uncommon_trap)), offset); - __ jalr(x1, t0, offset); + Deoptimization::uncommon_trap)), t0); __ bind(retaddr); // Set an oopmap for the call site @@ -2368,9 +2356,7 @@ void SharedRuntime::generate_uncommon_trap_blob() { // sp should already be aligned __ mv(c_rarg0, xthread); __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); - offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), t0); // Set an oopmap for the call site // Use the same PC we used for the last java frame @@ -2439,9 +2425,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t // Do the call __ mv(c_rarg0, xthread); - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(call_ptr), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(call_ptr), t0); __ bind(retaddr); // Set an oopmap for the call site. This oopmap will map all @@ -2549,9 +2533,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha __ set_last_Java_frame(sp, noreg, retaddr, t0); __ mv(c_rarg0, xthread); - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(destination), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(destination), t0); __ bind(retaddr); } @@ -2688,9 +2670,7 @@ void OptoRuntime::generate_exception_blob() { address the_pc = __ pc(); __ set_last_Java_frame(sp, noreg, the_pc, t0); __ mv(c_rarg0, xthread); - int32_t offset = 0; - __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); - __ jalr(x1, t0, offset); + __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), t0); // handle_exception_C is a special VM call which does not require an explicit diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index affb0f8b11e..2650c4f2a59 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -103,6 +103,12 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseMD5Intrinsics, false); } + // compressed instruction extension + if (UseRVC && !(_features & CPU_C)) { + warning("RVC is not supported on this CPU"); + FLAG_SET_DEFAULT(UseRVC, false); + } + if (UseRVV) { if (!(_features & CPU_V)) { warning("RVV is not supported on this CPU"); diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index 63d44d7a7b6..853b7daf420 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -431,7 +431,7 @@ class PatchingStub: public CodeStub { NativeMovRegMem* n_move = nativeMovRegMem_at(pc_start()); n_move->set_offset(field_offset); // Copy will never get executed, so only copy the part which is required for patching. - _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), (int)NativeGeneralJump::instruction_size); + _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), NOT_RISCV((int)NativeGeneralJump::instruction_size) RISCV_ONLY(NativeGeneralJump::get_instruction_size())); } else if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) { assert(_obj != noreg, "must have register object for load_klass/load_mirror"); #ifdef ASSERT diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index f8ddd9d8d1b..754fdd2bcc8 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -40,7 +40,7 @@ void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_cod // We must have enough patching space so that call can be inserted. // We cannot use fat nops here, since the concurrent code rewrite may transiently // create the illegal instruction sequence. - while ((intx) _masm->pc() - (intx) patch->pc_start() < NativeGeneralJump::instruction_size) { + while ((intx) _masm->pc() - (intx) patch->pc_start() < NOT_RISCV(NativeGeneralJump::instruction_size) RISCV_ONLY(NativeGeneralJump::get_instruction_size()) ) { _masm->nop(); } patch->install(_masm, patch_code, obj, info); diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp index 5b24e062b18..d3fa1578f95 100644 --- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp +++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp @@ -329,7 +329,7 @@ JVMFlag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) { } int minimum_alignment = 16; -#if defined(X86) && !defined(AMD64) +#if (defined(X86) && !defined(AMD64)) || defined(RISCV) minimum_alignment = 4; #elif defined(S390) minimum_alignment = 2;