From 54113e77d8bbd34dc87a0b92047e91e45c2962bf Mon Sep 17 00:00:00 2001
From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
Date: Wed, 29 Sep 2021 21:29:48 +0800
Subject: [PATCH 1/3] [C-Ext] Fix a potential place needing uncompressed for
 MacroAssembler::far_call()

---
 src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
index 97953cafb0e..5d6956c62c4 100644
--- a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
+++ b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
@@ -2762,7 +2762,7 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
     jalr_nc(x1, tmp, offset); // link
   } else {
     if (cbuf != NULL) { cbuf->set_insts_mark(); }
-    jal(entry); // link
+    jal_nc(entry); // link
   }
 }
 

From 70b506db563e7e950fdb72154b88a8eb2abf5d34 Mon Sep 17 00:00:00 2001
From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
Date: Mon, 11 Oct 2021 10:55:56 +0800
Subject: [PATCH 2/3] [C-Ext] Fix trampolines' alignment

---
 src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
index 5d6956c62c4..ed447fc9eb8 100644
--- a/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
+++ b/src/hotspot/cpu/riscv64/macroAssembler_riscv64.cpp
@@ -3193,7 +3193,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
 
   // make sure 4 byte aligned here, so that the destination address would be
   // 8 byte aligned after 3 intructions
-  while (offset() % wordSize == 0) { nop(); }
+  // C-Ext: when we reach here we may get a 2-byte alignment and
+  //   nop() will be 2 bytes in length.
+  while (offset() % wordSize != 4) { nop(); }
 
   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
                                             insts_call_instruction_offset));
@@ -3208,6 +3210,7 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
   bind(target);
   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
          "should be");
+  assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64");
   emit_int64((intptr_t)dest);
 
   const address stub_start_addr = addr_at(stub_start_offset);

From ed248991f0ef7cadefca6a80fe39e146860d3698 Mon Sep 17 00:00:00 2001
From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
Date: Thu, 14 Oct 2021 11:06:47 +0800
Subject: [PATCH 3/3] [C-Ext] Fix the cache line spanning problem for patchable
 CallNodes

---
 .../cpu/riscv64/c1_LIRAssembler_riscv64.cpp   |  7 +++-
 src/hotspot/cpu/riscv64/riscv64.ad            | 37 +++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp b/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp
index 969f39af704..5a08e53d676 100644
--- a/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp
+++ b/src/hotspot/cpu/riscv64/c1_LIRAssembler_riscv64.cpp
@@ -1334,7 +1334,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
   }
 }
 
-void LIR_Assembler::align_call(LIR_Code code) {  }
+void LIR_Assembler::align_call(LIR_Code code) {
+  // C-Ext: With C-Ext a call may get 2-byte aligned.
+  //   the address of jal itself (which will be patched later) should not span the cache line.
+  //   See CallDynamicJavaDirectNode::compute_padding() for more info.
+  __ align(4);
+}
 
 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
   address call = __ trampoline_call(Address(op->addr(), rtype));
diff --git a/src/hotspot/cpu/riscv64/riscv64.ad b/src/hotspot/cpu/riscv64/riscv64.ad
index ac94e14136a..2214af91398 100644
--- a/src/hotspot/cpu/riscv64/riscv64.ad
+++ b/src/hotspot/cpu/riscv64/riscv64.ad
@@ -1208,6 +1208,41 @@ int MachCallNativeNode::ret_addr_offset() {
   return -1;
 }
 
+// C-Ext: With C-Ext a call may get 2-byte aligned.
+//   The offset encoding in jal ranges bits [12, 31], which could span the cache line.
+//   Patching this unaligned address will make the write operation not atomic.
+//   Other threads may be running the same piece of code at full speed, causing concurrency issues.
+//   So we must ensure that it does not span a cache line so that it can be patched.
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
+{
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// C-Ext: With C-Ext a call may get 2-byte aligned.
+//   The offset encoding in jal ranges bits [12, 31], which could span the cache line.
+//   Patching this unaligned address will make the write operation not atomic.
+//   Other threads may be running the same piece of code at full speed, causing concurrency issues.
+//   So we must ensure that it does not span a cache line so that it can be patched.
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+{
+  // skip the movptr in MacroAssembler::ic_call():
+  // lui + addi + slli(C) + addi + slli(C) + addi
+  // Though movptr() has already 4-byte aligned with or without C-Ext,
+  // We need to prevent from further changes by explicitly calculating the size.
+  const int instruction_size = NativeInstruction::instruction_size;
+  const int compressed_instruction_size = (!UseCExt ? instruction_size : NativeInstruction::compressed_instruction_size);
+  const int movptr_size =
+         2 * instruction_size +
+         1 * compressed_instruction_size +
+         1 * instruction_size +
+         1 * compressed_instruction_size +
+         1 * instruction_size;
+  current_offset += movptr_size;
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
 //=============================================================================
 
 #ifndef PRODUCT
@@ -9760,6 +9795,7 @@ instruct CallStaticJavaDirect(method meth)
               riscv64_enc_call_epilog );
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // TO HERE
@@ -9779,6 +9815,7 @@ instruct CallDynamicJavaDirect(method meth, iRegL_R6 cr)
                riscv64_enc_call_epilog );
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // Call Runtime Instruction