From 362ab57879e8e888d7bd4fe29b2b81c8472ee5ed Mon Sep 17 00:00:00 2001
From: "yunyao.zxl" <yunyao.zxl@alibaba-inc.com>
Date: Mon, 8 Nov 2021 16:24:11 +0800
Subject: [PATCH] Support RVC: compressed instructions

---
 src/hotspot/cpu/riscv/assembler_riscv.cpp     | 138 ++-
 src/hotspot/cpu/riscv/assembler_riscv.hpp     | 281 +++---
 .../cpu/riscv/assembler_riscv_cext.hpp        | 865 ++++++++++++++++++
 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   8 +-
 .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  15 +-
 .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   2 +-
 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |   8 +-
 .../cpu/riscv/c2_MacroAssembler_riscv.cpp     |  16 +-
 .../cpu/riscv/c2_MacroAssembler_riscv.hpp     |  10 +
 src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |   2 +-
 .../riscv/c2_safepointPollStubTable_riscv.cpp |   2 +-
 src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   8 +-
 .../gc/shared/barrierSetAssembler_riscv.cpp   |  30 +-
 .../gc/shared/barrierSetNMethod_riscv.cpp     |  93 +-
 .../riscv/gc/z/zBarrierSetAssembler_riscv.cpp |   4 +-
 src/hotspot/cpu/riscv/globals_riscv.hpp       |   5 +-
 src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |   4 +-
 .../cpu/riscv/jniFastGetField_riscv.cpp       |   8 +-
 .../cpu/riscv/macroAssembler_riscv.cpp        | 248 +++--
 .../cpu/riscv/macroAssembler_riscv.hpp        |  31 +-
 src/hotspot/cpu/riscv/nativeInst_riscv.cpp    | 173 +++-
 src/hotspot/cpu/riscv/nativeInst_riscv.hpp    | 188 +++-
 src/hotspot/cpu/riscv/register_riscv.hpp      |  18 +-
 src/hotspot/cpu/riscv/riscv.ad                |  85 +-
 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  46 +-
 src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   6 +
 src/hotspot/share/c1/c1_CodeStubs.hpp         |   2 +-
 src/hotspot/share/c1/c1_LIRAssembler.cpp      |   2 +-
 .../flags/jvmFlagConstraintsCompiler.cpp      |   2 +-
 29 files changed, 1810 insertions(+), 490 deletions(-)
 create mode 100644 src/hotspot/cpu/riscv/assembler_riscv_cext.hpp

diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
index b4da68e3202..1bd7588b983 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
@@ -34,6 +34,7 @@
 #include "memory/resourceArea.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "nativeInst_riscv.hpp"
 
 int AbstractAssembler::code_fill_byte() {
   return 0;
@@ -80,6 +81,11 @@ void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp)
 }
 
 void Assembler::li(Register Rd, int64_t imm) {
+  if (UseRVC && is_imm_in_range(imm, 6, 0) && Rd != x0) {
+    li_c(Rd, imm);
+    return;
+  }
+
   // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
   int shift = 12;
   int64_t upper = imm, lower = imm;
@@ -124,18 +130,18 @@ void Assembler::li64(Register Rd, int64_t imm) {
    lo = (lo << 52) >> 52;
    up -= lo;
    up = (int32_t)up;
-   lui(Rd, up);
-   addi(Rd, Rd, lo);
+   lui_nc(Rd, up);
+   addi_nc(Rd, Rd, lo);
 
    // Load the rest 32 bits.
    slli(Rd, Rd, 12);
-   addi(Rd, Rd, (int32_t)lower >> 20);
+   addi_nc(Rd, Rd, (int32_t)lower >> 20);
    slli(Rd, Rd, 12);
    lower = ((int32_t)imm << 12) >> 20;
-   addi(Rd, Rd, lower);
+   addi_nc(Rd, Rd, lower);
    slli(Rd, Rd, 8);
    lower = imm & 0xff;
-   addi(Rd, Rd, lower);
+   addi_nc(Rd, Rd, lower);
 }
 
 void Assembler::li32(Register Rd, int32_t imm) {
@@ -145,40 +151,48 @@ void Assembler::li32(Register Rd, int32_t imm) {
   upper -= lower;
   upper = (int32_t)upper;
   // lui Rd, imm[31:12] + imm[11]
-  lui(Rd, upper);
+  lui_nc(Rd, upper);
   // use addiw to distinguish li32 to li64
-  addiw(Rd, Rd, lower);
+  addiw_nc(Rd, Rd, lower);
 }
 
-#define INSN(NAME, REGISTER)                                       \
+#define INSN(NAME, REGISTER, C)                                    \
   void Assembler::NAME(const address &dest, Register temp) {       \
     assert_cond(dest != NULL);                                     \
     int64_t distance = dest - pc();                                \
     if (is_imm_in_range(distance, 20, 1)) {                        \
-      jal(REGISTER, distance);                                     \
+      EMIT_MAY_COMPRESS_NAME(C, jal, (REGISTER, distance));        \
     } else {                                                       \
       assert(temp != noreg, "temp must not be empty register!");   \
       int32_t offset = 0;                                          \
-      movptr_with_offset(temp, dest, offset);                      \
-      jalr(REGISTER, temp, offset);                                \
+      movptr_with_offset(temp, dest, offset, C);                   \
+      EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, offset));   \
     }                                                              \
   }                                                                \
   void Assembler::NAME(Label &l, Register temp) {                  \
-    jal(REGISTER, l, temp);                                        \
+    EMIT_MAY_COMPRESS_NAME(C, jal, (REGISTER, l, temp));           \
   }                                                                \
 
-  INSN(j,   x0);
-  INSN(jal, x1);
+  INSN(j,      x0, COMPRESSIBLE);
+  INSN(jal,    x1, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(j_nc,   x0, NOT_COMPRESSIBLE);
+  INSN(jal_nc, x1, NOT_COMPRESSIBLE);
 
 #undef INSN
 
-#define INSN(NAME, REGISTER)                                       \
+#define INSN(NAME, REGISTER, C)                                    \
   void Assembler::NAME(Register Rs) {                              \
-    jalr(REGISTER, Rs, 0);                                         \
+    EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, Rs, 0));            \
   }
 
-  INSN(jr,   x0);
-  INSN(jalr, x1);
+  INSN(jr,      x0, COMPRESSIBLE);
+  INSN(jalr,    x1, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(jr_nc,   x0, NOT_COMPRESSIBLE);
+  INSN(jalr_nc, x1, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -186,32 +200,36 @@ void Assembler::ret() {
   jalr(x0, x1, 0);
 }
 
-#define INSN(NAME, REGISTER)                                      \
-  void Assembler::NAME(const address &dest, Register temp) {      \
-    assert_cond(dest != NULL);                                    \
-    assert(temp != noreg, "temp must not be empty register!");    \
-    int64_t distance = dest - pc();                               \
-    if (is_offset_in_range(distance, 32)) {                       \
-      auipc(temp, distance + 0x800);                              \
-      jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20);      \
-    } else {                                                      \
-      int32_t offset = 0;                                         \
-      movptr_with_offset(temp, dest, offset);                     \
-      jalr(REGISTER, temp, offset);                               \
-    }                                                             \
+#define INSN(NAME, REGISTER, C)                                                             \
+  void Assembler::NAME(const address &dest, Register temp) {                                \
+    assert_cond(dest != NULL);                                                              \
+    assert(temp != noreg, "temp must not be empty register!");                              \
+    int64_t distance = dest - pc();                                                         \
+    if (is_offset_in_range(distance, 32)) {                                                 \
+      auipc(temp, distance + 0x800);                                                        \
+      EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, ((int32_t)distance << 20) >> 20));   \
+    } else {                                                                                \
+      int32_t offset = 0;                                                                   \
+      movptr_with_offset(temp, dest, offset, C);                                            \
+      EMIT_MAY_COMPRESS_NAME(C, jalr, (REGISTER, temp, offset));                            \
+    }                                                                                       \
   }
 
-  INSN(call, x1);
-  INSN(tail, x0);
+  INSN(call,    x1, COMPRESSIBLE);
+  INSN(tail,    x0, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(call_nc, x1, NOT_COMPRESSIBLE);
+  INSN(tail_nc, x0, NOT_COMPRESSIBLE);
 
 #undef INSN
 
-#define INSN(NAME, REGISTER)                                   \
+#define INSN(NAME, REGISTER, NAME_NC)                          \
   void Assembler::NAME(const Address &adr, Register temp) {    \
     switch(adr.getMode()) {                                    \
       case Address::literal: {                                 \
         code_section()->relocate(pc(), adr.rspec());           \
-        NAME(adr.target(), temp);                              \
+        NAME_NC(adr.target(), temp);                           \
         break;                                                 \
       }                                                        \
       case Address::base_plus_offset:{                         \
@@ -225,10 +243,14 @@ void Assembler::ret() {
     }                                                          \
   }
 
-  INSN(j,    x0);
-  INSN(jal,  x1);
-  INSN(call, x1);
-  INSN(tail, x0);
+  INSN(j,      x0, j_nc);
+  INSN(jal,    x1, jal_nc);
+  INSN(call,   x1, call_nc);
+  INSN(tail,   x0, tail_nc);
+
+  // C-Ext: incompressible version
+  INSN(j_nc,   x0, j_nc);
+  INSN(jal_nc, x1, jal_nc);
 
 #undef INSN
 
@@ -237,7 +259,7 @@ void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branc
   if (is_far) {
     Label done;
     (this->*neg_insn)(r1, r2, done, /* is_far */ false);
-    j(L);
+    j_nc(L);
     bind(done);
   } else {
     if (L.is_bound()) {
@@ -267,7 +289,25 @@ void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) {
   }
 }
 
-void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
+void Assembler::wrap_label(Label &L, j_c_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(pc());
+  }
+}
+
+void Assembler::wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(r, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(r, pc());
+  }
+}
+
+void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset, bool compressible) {
   uintptr_t imm64 = (uintptr_t)addr;
 #ifndef PRODUCT
   {
@@ -283,26 +323,26 @@ void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) {
   lower = (lower << 52) >> 52;
   upper -= lower;
   upper = (int32_t)upper;
-  lui(Rd, upper);
-  addi(Rd, Rd, lower);
+  EMIT_MAY_COMPRESS_INST(compressible, lui, (Rd, upper));
+  EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, lower));
 
   // Load the rest 16 bits.
   slli(Rd, Rd, 11);
-  addi(Rd, Rd, (imm64 >> 5) & 0x7ff);
+  EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, (imm64 >> 5) & 0x7ff));
   slli(Rd, Rd, 5);
 
   // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld.
   offset = imm64 & 0x1f;
 }
 
-void Assembler::movptr(Register Rd, uintptr_t imm64) {
-  movptr(Rd, (address)imm64);
+void Assembler::movptr(Register Rd, uintptr_t imm64, bool compressible) {
+  movptr(Rd, (address)imm64, compressible);
 }
 
-void Assembler::movptr(Register Rd, address addr) {
+void Assembler::movptr(Register Rd, address addr, bool compressible) {
   int offset = 0;
-  movptr_with_offset(Rd, addr, offset);
-  addi(Rd, Rd, offset);
+  movptr_with_offset(Rd, addr, offset, compressible);
+  EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, offset));
 }
 
 void Assembler::ifence() {
diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index 3564b3669b5..3da5009c7e6 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -255,6 +255,7 @@ class InternalAddress: public Address {
 
 class Assembler : public AbstractAssembler {
 public:
+#include "assembler_riscv_cext.hpp"
 
   enum { instruction_size = 4 };
 
@@ -304,9 +305,9 @@ class Assembler : public AbstractAssembler {
   void li(Register Rd, int64_t imm);  // optimized load immediate
   void li32(Register Rd, int32_t imm);
   void li64(Register Rd, int64_t imm);
-  void movptr(Register Rd, address addr);
-  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
-  void movptr(Register Rd, uintptr_t imm64);
+  void movptr(Register Rd, address addr, bool COMPRESSIBLE = true);
+  void movptr_with_offset(Register Rd, address addr, int32_t &offset, bool COMPRESSIBLE = true);
+  void movptr(Register Rd, uintptr_t imm64, bool COMPRESSIBLE = true);
   void ifence();
   void j(const address &dest, Register temp = t0);
   void j(const Address &adr, Register temp = t0) ;
@@ -379,12 +380,21 @@ class Assembler : public AbstractAssembler {
   }
 
   void halt() {
-    emit_int32(0);
+    if (UseRVC) {
+      emit_int16(0);
+    } else {
+      emit_int32(0);
+    }
   }
 
-// Rigster Instruction
-#define INSN(NAME, op, funct3, funct7)                          \
+// two C-Ext macros
+#define COMPRESSIBLE      true
+#define NOT_COMPRESSIBLE  false
+
+// Register Instruction
+#define INSN(NAME, op, funct3, funct7, C)                       \
   void NAME(Register Rd, Register Rs1, Register Rs2) {          \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, Rs2)                    \
     unsigned insn = 0;                                          \
     patch((address)&insn, 6,  0, op);                           \
     patch((address)&insn, 14, 12, funct3);                      \
@@ -395,37 +405,37 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                                 \
   }
 
-  INSN(add,   0b0110011, 0b000, 0b0000000);
-  INSN(sub,   0b0110011, 0b000, 0b0100000);
-  INSN(andr,  0b0110011, 0b111, 0b0000000);
-  INSN(orr,   0b0110011, 0b110, 0b0000000);
-  INSN(xorr,  0b0110011, 0b100, 0b0000000);
-  INSN(sll,   0b0110011, 0b001, 0b0000000);
-  INSN(sra,   0b0110011, 0b101, 0b0100000);
-  INSN(srl,   0b0110011, 0b101, 0b0000000);
-  INSN(slt,   0b0110011, 0b010, 0b0000000);
-  INSN(sltu,  0b0110011, 0b011, 0b0000000);
-  INSN(addw,  0b0111011, 0b000, 0b0000000);
-  INSN(subw,  0b0111011, 0b000, 0b0100000);
-  INSN(sllw,  0b0111011, 0b001, 0b0000000);
-  INSN(sraw,  0b0111011, 0b101, 0b0100000);
-  INSN(srlw,  0b0111011, 0b101, 0b0000000);
-  INSN(mul,   0b0110011, 0b000, 0b0000001);
-  INSN(mulh,  0b0110011, 0b001, 0b0000001);
-  INSN(mulhsu,0b0110011, 0b010, 0b0000001);
-  INSN(mulhu, 0b0110011, 0b011, 0b0000001);
-  INSN(mulw,  0b0111011, 0b000, 0b0000001);
-  INSN(div,   0b0110011, 0b100, 0b0000001);
-  INSN(divu,  0b0110011, 0b101, 0b0000001);
-  INSN(divw,  0b0111011, 0b100, 0b0000001);
-  INSN(divuw, 0b0111011, 0b101, 0b0000001);
-  INSN(rem,   0b0110011, 0b110, 0b0000001);
-  INSN(remu,  0b0110011, 0b111, 0b0000001);
-  INSN(remw,  0b0111011, 0b110, 0b0000001);
-  INSN(remuw, 0b0111011, 0b111, 0b0000001);
+  INSN(add,   0b0110011, 0b000, 0b0000000, COMPRESSIBLE);
+  INSN(sub,   0b0110011, 0b000, 0b0100000, COMPRESSIBLE);
+  INSN(andr,  0b0110011, 0b111, 0b0000000, COMPRESSIBLE);
+  INSN(orr,   0b0110011, 0b110, 0b0000000, COMPRESSIBLE);
+  INSN(xorr,  0b0110011, 0b100, 0b0000000, COMPRESSIBLE);
+  INSN(sll,   0b0110011, 0b001, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(sra,   0b0110011, 0b101, 0b0100000, NOT_COMPRESSIBLE);
+  INSN(srl,   0b0110011, 0b101, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(slt,   0b0110011, 0b010, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(sltu,  0b0110011, 0b011, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(addw,  0b0111011, 0b000, 0b0000000, COMPRESSIBLE);
+  INSN(subw,  0b0111011, 0b000, 0b0100000, COMPRESSIBLE);
+  INSN(sllw,  0b0111011, 0b001, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(sraw,  0b0111011, 0b101, 0b0100000, NOT_COMPRESSIBLE);
+  INSN(srlw,  0b0111011, 0b101, 0b0000000, NOT_COMPRESSIBLE);
+  INSN(mul,   0b0110011, 0b000, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(mulh,  0b0110011, 0b001, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(mulhsu,0b0110011, 0b010, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(mulhu, 0b0110011, 0b011, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(mulw,  0b0111011, 0b000, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(div,   0b0110011, 0b100, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(divu,  0b0110011, 0b101, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(divw,  0b0111011, 0b100, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(divuw, 0b0111011, 0b101, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(rem,   0b0110011, 0b110, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(remu,  0b0110011, 0b111, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(remw,  0b0111011, 0b110, 0b0000001, NOT_COMPRESSIBLE);
+  INSN(remuw, 0b0111011, 0b111, 0b0000001, NOT_COMPRESSIBLE);
 
   // Vector Configuration Instruction
-  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
+  INSN(vsetvl, 0b1010111, 0b111, 0b1000000, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -437,10 +447,11 @@ class Assembler : public AbstractAssembler {
     code_section()->relocate(inst_mark(), InternalAddress(dest).rspec());
 
   // Load/store register (all modes)
-#define INSN(NAME, op, funct3)                                                                     \
+#define INSN(NAME, op, funct3, NAME_NC, C)                                                         \
   void NAME(Register Rd, Register Rs, const int32_t offset) {                                      \
-    unsigned insn = 0;                                                                             \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset)                                                     \
+    unsigned insn = 0;                                                                             \
     int32_t val = offset & 0xfff;                                                                  \
     patch((address)&insn, 6, 0, op);                                                               \
     patch((address)&insn, 14, 12, funct3);                                                         \
@@ -457,18 +468,18 @@ class Assembler : public AbstractAssembler {
       NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                               \
     } else {                                                                                       \
       int32_t offset = 0;                                                                          \
-      movptr_with_offset(Rd, dest, offset);                                                        \
+      movptr_with_offset(Rd, dest, offset, C);                                                     \
       NAME(Rd, Rd, offset);                                                                        \
     }                                                                                              \
   }                                                                                                \
   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
-    NAME(Rd, dest);                                                                                \
+    NAME_NC(Rd, dest);                                                                             \
   }                                                                                                \
   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
     switch(adr.getMode()) {                                                                        \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
-        NAME(Rd, adr.target());                                                                    \
+        NAME_NC(Rd, adr.target());                                                                 \
         break;                                                                                     \
       }                                                                                            \
       case Address::base_plus_offset:{                                                             \
@@ -494,20 +505,24 @@ class Assembler : public AbstractAssembler {
     wrap_label(Rd, L, &Assembler::NAME);                                                           \
   }
 
-  INSN(lb,  0b0000011, 0b000);
-  INSN(lbu, 0b0000011, 0b100);
-  INSN(ld,  0b0000011, 0b011);
-  INSN(lh,  0b0000011, 0b001);
-  INSN(lhu, 0b0000011, 0b101);
-  INSN(lw,  0b0000011, 0b010);
-  INSN(lwu, 0b0000011, 0b110);
+  INSN(lb,     0b0000011, 0b000, lb,    NOT_COMPRESSIBLE);
+  INSN(lbu,    0b0000011, 0b100, lbu,   NOT_COMPRESSIBLE);
+  INSN(lh,     0b0000011, 0b001, lh,    NOT_COMPRESSIBLE);
+  INSN(lhu,    0b0000011, 0b101, lhu,   NOT_COMPRESSIBLE);
+  INSN(lw,     0b0000011, 0b010, lw_nc, COMPRESSIBLE);
+  INSN(lwu,    0b0000011, 0b110, lwu,   NOT_COMPRESSIBLE);
+  INSN(ld,     0b0000011, 0b011, ld_nc, COMPRESSIBLE);
 
+  // C-Ext: incompressible version
+  INSN(lw_nc,  0b0000011, 0b010, lw_nc, NOT_COMPRESSIBLE);
+  INSN(ld_nc,  0b0000011, 0b011, ld_nc, NOT_COMPRESSIBLE);
 #undef INSN
 
-#define INSN(NAME, op, funct3)                                                                     \
+#define INSN(NAME, op, funct3, NAME_NC, C)                                                         \
   void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                                 \
-    unsigned insn = 0;                                                                             \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset)                                                     \
+    unsigned insn = 0;                                                                             \
     uint32_t val = offset & 0xfff;                                                                 \
     patch((address)&insn, 6, 0, op);                                                               \
     patch((address)&insn, 14, 12, funct3);                                                         \
@@ -524,18 +539,18 @@ class Assembler : public AbstractAssembler {
       NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                             \
     } else {                                                                                       \
       int32_t offset = 0;                                                                          \
-      movptr_with_offset(temp, dest, offset);                                                      \
+      movptr_with_offset(temp, dest, offset, C);                                                   \
       NAME(Rd, temp, offset);                                                                      \
     }                                                                                              \
   }                                                                                                \
   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \
-    NAME(Rd, dest, temp);                                                                          \
+    NAME_NC(Rd, dest, temp);                                                                       \
   }                                                                                                \
   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
     switch(adr.getMode()) {                                                                        \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
-        NAME(Rd, adr.target(), temp);                                                              \
+        NAME_NC(Rd, adr.target(), temp);                                                           \
         break;                                                                                     \
       }                                                                                            \
       case Address::base_plus_offset:{                                                             \
@@ -553,14 +568,18 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }
 
-  INSN(flw, 0b0000111, 0b010);
-  INSN(fld, 0b0000111, 0b011);
+  INSN(flw,    0b0000111, 0b010, flw,    NOT_COMPRESSIBLE);
+  INSN(fld,    0b0000111, 0b011, fld_nc, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(fld_nc, 0b0000111, 0b011, fld_nc, NOT_COMPRESSIBLE);
 #undef INSN
 
-#define INSN(NAME, op, funct3)                                                                           \
+#define INSN(NAME, op, funct3, NAME_NC, C)                                                               \
   void NAME(Register Rs1, Register Rs2, const int64_t offset) {                                          \
-    unsigned insn = 0;                                                                                   \
     guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
+    EMIT_MAY_COMPRESS(C, NAME, Rs1, Rs2, offset)                                                         \
+    unsigned insn = 0;                                                                                   \
     uint32_t val  = offset & 0x1fff;                                                                     \
     uint32_t val11 = (val >> 11) & 0x1;                                                                  \
     uint32_t val12 = (val >> 12) & 0x1;                                                                  \
@@ -583,15 +602,19 @@ class Assembler : public AbstractAssembler {
     NAME(Rs1, Rs2, offset);                                                                              \
   }                                                                                                      \
   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
-    NAME(Rs1, Rs2, dest);                                                                                \
+    NAME_NC(Rs1, Rs2, dest);                                                                             \
   }
 
-  INSN(beq,  0b1100011, 0b000);
-  INSN(bge,  0b1100011, 0b101);
-  INSN(bgeu, 0b1100011, 0b111);
-  INSN(blt,  0b1100011, 0b100);
-  INSN(bltu, 0b1100011, 0b110);
-  INSN(bne,  0b1100011, 0b001);
+  INSN(beq,     0b1100011, 0b000, beq_nc, COMPRESSIBLE);
+  INSN(bne,     0b1100011, 0b001, bne_nc, COMPRESSIBLE);
+  INSN(bge,     0b1100011, 0b101, bge,    NOT_COMPRESSIBLE);
+  INSN(bgeu,    0b1100011, 0b111, bgeu,   NOT_COMPRESSIBLE);
+  INSN(blt,     0b1100011, 0b100, blt,    NOT_COMPRESSIBLE);
+  INSN(bltu,    0b1100011, 0b110, bltu,   NOT_COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(beq_nc,  0b1100011, 0b000, beq_nc, NOT_COMPRESSIBLE);
+  INSN(bne_nc,  0b1100011, 0b001, bne_nc, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -607,12 +630,17 @@ class Assembler : public AbstractAssembler {
   INSN(bltu, bgeu);
   INSN(bgeu, bltu);
 
+  // C-Ext: incompressible version
+  INSN(beq_nc,  bne_nc);
+  INSN(bne_nc,  beq_nc);
+
 #undef INSN
 
-#define INSN(NAME, REGISTER, op, funct3)                                                                    \
+#define INSN(NAME, REGISTER, op, funct3, NAME_NC, C)                                                        \
   void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) {                                             \
-    unsigned insn = 0;                                                                                      \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                                        \
+    EMIT_MAY_COMPRESS(C, NAME, Rs1, Rs2, offset)                                                            \
+    unsigned insn = 0;                                                                                      \
     uint32_t val  = offset & 0xfff;                                                                         \
     uint32_t low  = val & 0x1f;                                                                             \
     uint32_t high = (val >> 5) & 0x7f;                                                                      \
@@ -625,19 +653,24 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                                                                             \
   }                                                                                                         \
   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0))   \
-    NAME(Rs, dest, temp);                                                                                   \
+    NAME_NC(Rs, dest, temp);                                                                                \
   }
 
-  INSN(sb,  Register,      0b0100011, 0b000);
-  INSN(sh,  Register,      0b0100011, 0b001);
-  INSN(sw,  Register,      0b0100011, 0b010);
-  INSN(sd,  Register,      0b0100011, 0b011);
-  INSN(fsw, FloatRegister, 0b0100111, 0b010);
-  INSN(fsd, FloatRegister, 0b0100111, 0b011);
+  INSN(sb,     Register,      0b0100011, 0b000, sb,     NOT_COMPRESSIBLE);
+  INSN(sh,     Register,      0b0100011, 0b001, sh,     NOT_COMPRESSIBLE);
+  INSN(sw,     Register,      0b0100011, 0b010, sw_nc,  COMPRESSIBLE);
+  INSN(sd,     Register,      0b0100011, 0b011, sd_nc,  COMPRESSIBLE);
+  INSN(fsw,    FloatRegister, 0b0100111, 0b010, fsw,    NOT_COMPRESSIBLE);
+  INSN(fsd,    FloatRegister, 0b0100111, 0b011, fsd_nc, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(sw_nc,  Register,      0b0100011, 0b010, sw_nc,  NOT_COMPRESSIBLE);
+  INSN(sd_nc,  Register,      0b0100011, 0b011, sd_nc,  NOT_COMPRESSIBLE);
+  INSN(fsd_nc, FloatRegister, 0b0100111, 0b011, fsd_nc, NOT_COMPRESSIBLE);
 
 #undef INSN
 
-#define INSN(NAME)                                                                                 \
+#define INSN(NAME, NAME_NC, C)                                                                     \
   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
     assert_cond(dest != NULL);                                                                     \
     assert_different_registers(Rs, temp);                                                          \
@@ -647,7 +680,7 @@ class Assembler : public AbstractAssembler {
       NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                             \
     } else {                                                                                       \
       int32_t offset = 0;                                                                          \
-      movptr_with_offset(temp, dest, offset);                                                      \
+      movptr_with_offset(temp, dest, offset, C);                                                   \
       NAME(Rs, temp, offset);                                                                      \
     }                                                                                              \
   }                                                                                                \
@@ -656,7 +689,7 @@ class Assembler : public AbstractAssembler {
       case Address::literal: {                                                                     \
         assert_different_registers(Rs, temp);                                                      \
         code_section()->relocate(pc(), adr.rspec());                                               \
-        NAME(Rs, adr.target(), temp);                                                              \
+        NAME_NC(Rs, adr.target(), temp);                                                           \
         break;                                                                                     \
       }                                                                                            \
       case Address::base_plus_offset:{                                                             \
@@ -675,14 +708,18 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }
 
-  INSN(sb);
-  INSN(sh);
-  INSN(sw);
-  INSN(sd);
+  INSN(sb,    sb,    NOT_COMPRESSIBLE);
+  INSN(sh,    sh,    NOT_COMPRESSIBLE);
+  INSN(sw,    sw_nc, COMPRESSIBLE);
+  INSN(sd,    sd_nc, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(sw_nc, sw_nc, NOT_COMPRESSIBLE);
+  INSN(sd_nc, sd_nc, NOT_COMPRESSIBLE);
 
 #undef INSN
 
-#define INSN(NAME)                                                                                 \
+#define INSN(NAME, NAME_NC, C)                                                                     \
   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
     assert_cond(dest != NULL);                                                                     \
     int64_t distance = (dest - pc());                                                              \
@@ -691,7 +728,7 @@ class Assembler : public AbstractAssembler {
       NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                             \
     } else {                                                                                       \
       int32_t offset = 0;                                                                          \
-      movptr_with_offset(temp, dest, offset);                                                      \
+      movptr_with_offset(temp, dest, offset, C);                                                   \
       NAME(Rs, temp, offset);                                                                      \
     }                                                                                              \
   }                                                                                                \
@@ -699,7 +736,7 @@ class Assembler : public AbstractAssembler {
     switch(adr.getMode()) {                                                                        \
       case Address::literal: {                                                                     \
         code_section()->relocate(pc(), adr.rspec());                                               \
-        NAME(Rs, adr.target(), temp);                                                              \
+        NAME_NC(Rs, adr.target(), temp);                                                           \
         break;                                                                                     \
       }                                                                                            \
       case Address::base_plus_offset:{                                                             \
@@ -717,8 +754,11 @@ class Assembler : public AbstractAssembler {
     }                                                                                              \
   }
 
-  INSN(fsw);
-  INSN(fsd);
+  INSN(fsw,    fsw,    NOT_COMPRESSIBLE);
+  INSN(fsd,    fsd_nc, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(fsd_nc, fsd_nc, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -760,10 +800,11 @@ class Assembler : public AbstractAssembler {
 
 #undef INSN
 
-#define INSN(NAME, op)                                                                        \
+#define INSN(NAME, op, C)                                                                     \
   void NAME(Register Rd, const int32_t offset) {                                              \
-    unsigned insn = 0;                                                                        \
     guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid.");                          \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, offset)                                                    \
+    unsigned insn = 0;                                                                        \
     patch((address)&insn, 6, 0, op);                                                          \
     patch_reg((address)&insn, 7, Rd);                                                         \
     patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                         \
@@ -780,8 +821,8 @@ class Assembler : public AbstractAssembler {
     } else {                                                                                  \
       assert_different_registers(Rd, temp);                                                   \
       int32_t off = 0;                                                                        \
-      movptr_with_offset(temp, dest, off);                                                    \
-      jalr(Rd, temp, off);                                                                    \
+      movptr_with_offset(temp, dest, off, C);                                                 \
+      EMIT_MAY_COMPRESS_NAME(C, jalr, (Rd, temp, off));                                       \
     }                                                                                         \
   }                                                                                           \
   void NAME(Register Rd, Label &L, Register temp = t0) {                                      \
@@ -789,16 +830,20 @@ class Assembler : public AbstractAssembler {
     wrap_label(Rd, L, temp, &Assembler::NAME);                                                \
   }
 
-  INSN(jal, 0b1101111);
+  INSN(jal,    0b1101111, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(jal_nc, 0b1101111, NOT_COMPRESSIBLE);
 
 #undef INSN
 
 #undef INSN_ENTRY_RELOC
 
-#define INSN(NAME, op, funct)                                                              \
+#define INSN(NAME, op, funct, C)                                                           \
   void NAME(Register Rd, Register Rs, const int32_t offset) {                              \
-    unsigned insn = 0;                                                                     \
     guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                       \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs, offset)                                             \
+    unsigned insn = 0;                                                                     \
     patch((address)&insn, 6, 0, op);                                                       \
     patch_reg((address)&insn, 7, Rd);                                                      \
     patch((address)&insn, 14, 12, funct);                                                  \
@@ -808,7 +853,10 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                                                            \
   }
 
-  INSN(jalr, 0b1100111, 0b000);
+  INSN(jalr,    0b1100111, 0b000, COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(jalr_nc, 0b1100111, 0b000, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -831,8 +879,9 @@ class Assembler : public AbstractAssembler {
     emit(insn);
   }
 
-#define INSN(NAME, op, funct3, funct7)                      \
+#define INSN(NAME, op, funct3, funct7, C)                   \
   void NAME() {                                             \
+    EMIT_MAY_COMPRESS(C, NAME)                              \
     unsigned insn = 0;                                      \
     patch((address)&insn, 6, 0, op);                        \
     patch((address)&insn, 11, 7, 0b00000);                  \
@@ -842,9 +891,9 @@ class Assembler : public AbstractAssembler {
     emit(insn);                                             \
   }
 
-  INSN(fence_i, 0b0001111, 0b001, 0b000000000000);
-  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
-  INSN(ebreak,  0b1110011, 0b000, 0b000000000001);
+  INSN(fence_i, 0b0001111, 0b001, 0b000000000000, NOT_COMPRESSIBLE);
+  INSN(ecall,   0b1110011, 0b000, 0b000000000000, NOT_COMPRESSIBLE);
+  INSN(ebreak,  0b1110011, 0b000, 0b000000000001, COMPRESSIBLE);
 #undef INSN
 
 enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
@@ -940,9 +989,10 @@ enum operand_size { int8, int16, int32, uint32, int64 };
 #undef INSN
 
 // Immediate Instruction
-#define INSN(NAME, op, funct3)                                                              \
+#define INSN(NAME, op, funct3, C)                                                           \
   void NAME(Register Rd, Register Rs1, int32_t imm) {                                       \
     guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity");                 \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, imm)                                                \
     unsigned insn = 0;                                                                      \
     patch((address)&insn, 6, 0, op);                                                        \
     patch((address)&insn, 14, 12, funct3);                                                  \
@@ -952,12 +1002,16 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                                             \
   }
 
-  INSN(addi,  0b0010011, 0b000);
-  INSN(slti,  0b0010011, 0b010);
-  INSN(addiw, 0b0011011, 0b000);
-  INSN(and_imm12,  0b0010011, 0b111);
-  INSN(ori,   0b0010011, 0b110);
-  INSN(xori,  0b0010011, 0b100);
+  INSN(addi,      0b0010011, 0b000, COMPRESSIBLE);
+  INSN(slti,      0b0010011, 0b010, NOT_COMPRESSIBLE);
+  INSN(addiw,     0b0011011, 0b000, COMPRESSIBLE);
+  INSN(and_imm12, 0b0010011, 0b111, COMPRESSIBLE);
+  INSN(ori,       0b0010011, 0b110, NOT_COMPRESSIBLE);
+  INSN(xori,      0b0010011, 0b100, NOT_COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(addi_nc,   0b0010011, 0b000, NOT_COMPRESSIBLE);
+  INSN(addiw_nc,  0b0011011, 0b000, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -978,9 +1032,10 @@ enum operand_size { int8, int16, int32, uint32, int64 };
 #undef INSN
 
 // Shift Immediate Instruction
-#define INSN(NAME, op, funct3, funct6)                                   \
+#define INSN(NAME, op, funct3, funct6, C)                                \
   void NAME(Register Rd, Register Rs1, unsigned shamt) {                 \
     guarantee(shamt <= 0x3f, "Shamt is invalid");                        \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, Rs1, shamt)                           \
     unsigned insn = 0;                                                   \
     patch((address)&insn, 6, 0, op);                                     \
     patch((address)&insn, 14, 12, funct3);                               \
@@ -991,9 +1046,9 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                          \
   }
 
-  INSN(slli,  0b0010011, 0b001, 0b000000);
-  INSN(srai,  0b0010011, 0b101, 0b010000);
-  INSN(srli,  0b0010011, 0b101, 0b000000);
+  INSN(slli,  0b0010011, 0b001, 0b000000, COMPRESSIBLE);
+  INSN(srai,  0b0010011, 0b101, 0b010000, COMPRESSIBLE);
+  INSN(srli,  0b0010011, 0b101, 0b000000, COMPRESSIBLE);
 
 #undef INSN
 
@@ -1018,8 +1073,9 @@ enum operand_size { int8, int16, int32, uint32, int64 };
 #undef INSN
 
 // Upper Immediate Instruction
-#define INSN(NAME, op)                                                  \
+#define INSN(NAME, op, C)                                               \
   void NAME(Register Rd, int32_t imm) {                                 \
+    EMIT_MAY_COMPRESS(C, NAME, Rd, imm)                                 \
     int32_t upperImm = imm >> 12;                                       \
     unsigned insn = 0;                                                  \
     patch((address)&insn, 6, 0, op);                                    \
@@ -1029,8 +1085,11 @@ enum operand_size { int8, int16, int32, uint32, int64 };
     emit(insn);                                                         \
   }
 
-  INSN(lui,   0b0110111);
-  INSN(auipc, 0b0010111);
+  INSN(lui,    0b0110111, COMPRESSIBLE);
+  INSN(auipc,  0b0010111, NOT_COMPRESSIBLE);
+
+  // C-Ext: incompressible version
+  INSN(lui_nc, 0b0110111, NOT_COMPRESSIBLE);
 
 #undef INSN
 
@@ -1913,6 +1972,4 @@ enum Nf {
 
 };
 
-class BiasedLockingCounters;
-
 #endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp b/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp
new file mode 100644
index 00000000000..79cf860a4c0
--- /dev/null
+++ b/src/hotspot/cpu/riscv/assembler_riscv_cext.hpp
@@ -0,0 +1,865 @@
+/*
+ * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2021, Alibaba Group Holding Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP
+#define CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP
+
+  // C-Ext: If an instruction is compressible, then
+  //   we will implicitly emit a 16-bit compressed instruction instead of the 32-bit
+  //   instruction in Assembler. All below logic follows Chapter -
+  //   "C" Standard Extension for Compressed Instructions, Version 2.0.
+  //   We can get code size reduction and performance improvement with this extension,
+  //   considering the reduction of instruction size and the code density increment.
+
+  // Note:
+  //   1. When UseRVC is enabled, some of normal instructions will be implicitly
+  //      changed to its 16-bit version.
+  //   2. C-Ext's instructions in Assembler always end with '_c' suffix, as 'li_c',
+  //      but most of time we have no need to explicitly use these instructions.
+  //      (Although spec says 'c.li', we use 'li_c' to unify related names - see below.
+  //   3. In some cases, we need to force using one instruction's uncompressed version,
+  //      for instance code being patched should remain its general and longest version
+  //      to cover all possible cases, or code requiring a fixed length.
+  //      So we introduce '_nc' suffix (short for: not compressible) to force an instruction
+  //      to remain its normal 4-byte version.
+  //     An example:
+  //      j() (32-bit) could become j_c() (16-bit) with -XX:+UseRVC if compressible. We could
+  //      use j_nc() to force it to remain its normal 4-byte version.
+  //   4. Using -XX:PrintAssemblyOptions=no-aliases could print C-Ext instructions instead of
+  //      normal ones.
+  //
+
+  // C-Ext: incompressible version
+  void j_nc(const address &dest, Register temp = t0);
+  void j_nc(const Address &adr, Register temp = t0) ;
+  void j_nc(Label &l, Register temp = t0);
+  void jal_nc(Label &l, Register temp = t0);
+  void jal_nc(const address &dest, Register temp = t0);
+  void jal_nc(const Address &adr, Register temp = t0);
+  void jr_nc(Register Rs);
+  void jalr_nc(Register Rs);
+  void call_nc(const address &dest, Register temp = t0);
+  void tail_nc(const address &dest, Register temp = t0);
+
+  // C-Ext: extract a 16-bit instruction.
+  static inline uint16_t extract_c(uint16_t val, unsigned msb, unsigned lsb) {
+    assert_cond(msb >= lsb && msb <= 15);
+    unsigned nbits = msb - lsb + 1;
+    uint16_t mask = (1U << nbits) - 1;
+    uint16_t result = val >> lsb;
+    result &= mask;
+    return result;
+  }
+
+  static inline int16_t sextract_c(uint16_t val, unsigned msb, unsigned lsb) {
+    assert_cond(msb >= lsb && msb <= 15);
+    int16_t result = val << (15 - msb);
+    result >>= (15 - msb + lsb);
+    return result;
+  }
+
+  // C-Ext: patch a 16-bit instruction.
+  static void patch_c(address a, unsigned msb, unsigned lsb, uint16_t val) {
+    assert_cond(a != NULL);
+    assert_cond(msb >= lsb && msb <= 15);
+    unsigned nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    uint16_t mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    uint16_t target = *(uint16_t *)a;
+    target &= ~mask;
+    target |= val;
+    *(uint16_t *)a = target;
+  }
+
+  static void patch_c(address a, unsigned bit, uint16_t val) {
+    patch_c(a, bit, bit, val);
+  }
+
+  // C-Ext: patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
+  static void patch_reg_c(address a, unsigned lsb, Register reg) {
+    patch_c(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // C-Ext: patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
+  static void patch_compressed_reg_c(address a, unsigned lsb, Register reg) {
+    patch_c(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+  // C-Ext: patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
+  static void patch_reg_c(address a, unsigned lsb, FloatRegister reg) {
+    patch_c(a, lsb + 4, lsb, reg->encoding_nocheck());
+  }
+
+  // C-Ext: patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
+  static void patch_compressed_reg_c(address a, unsigned lsb, FloatRegister reg) {
+    patch_c(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
+  }
+
+public:
+
+// C-Ext: Compressed Instructions
+
+// --------------  C-Ext Instruction Definitions  --------------
+
+  void nop_c() {
+    addi_c(x0, 0);
+  }
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    patch_reg_c((address)&insn, 7, Rd_Rs1);                                                  \
+    patch_c((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(addi_c,   0b000, 0b01);
+  INSN(addiw_c,  0b001, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t imm) {                                                                   \
+    assert_cond(is_imm_in_range(imm, 10, 0));                                                \
+    assert_cond((imm & 0b1111) == 0);                                                        \
+    assert_cond(imm != 0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    patch_c((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7);                             \
+    patch_c((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6);                                  \
+    patch_c((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4);                                  \
+    patch_reg_c((address)&insn, 7, sp);                                                      \
+    patch_c((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9);                                \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(addi16sp_c, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 10, 0));                                      \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(uimm != 0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_compressed_reg_c((address)&insn, 2, Rd);                                           \
+    patch_c((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3);                                 \
+    patch_c((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    patch_c((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6);                          \
+    patch_c((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4);                          \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(addi4spn_c, 0b000, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    patch_reg_c((address)&insn, 7, Rd_Rs1);                                                  \
+    patch_c((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(slli_c, 0b000, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
+    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
+    assert_cond(shamt != 0);                                                                 \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
+    patch_compressed_reg_c((address)&insn, 7, Rd_Rs1);                                       \
+    patch_c((address)&insn, 11, 10, funct2);                                                 \
+    patch_c((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(srli_c, 0b100, 0b00, 0b01);
+  INSN(srai_c, 0b100, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    patch_compressed_reg_c((address)&insn, 7, Rd_Rs1);                                       \
+    patch_c((address)&insn, 11, 10, funct2);                                                 \
+    patch_c((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(andi_c, 0b100, 0b10, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct6, funct2, op)                                                       \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_compressed_reg_c((address)&insn, 2, Rs2);                                          \
+    patch_c((address)&insn, 6, 5, funct2);                                                   \
+    patch_compressed_reg_c((address)&insn, 7, Rd_Rs1);                                       \
+    patch_c((address)&insn, 15, 10, funct6);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(sub_c,  0b100011, 0b00, 0b01);
+  INSN(xor_c,  0b100011, 0b01, 0b01);
+  INSN(or_c,   0b100011, 0b10, 0b01);
+  INSN(and_c,  0b100011, 0b11, 0b01);
+  INSN(subw_c, 0b100111, 0b00, 0b01);
+  INSN(addw_c, 0b100111, 0b01, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
+    assert_cond(Rd_Rs1 != x0);                                                               \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_reg_c((address)&insn, 2, Rs2);                                                     \
+    patch_reg_c((address)&insn, 7, Rd_Rs1);                                                  \
+    patch_c((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(mv_c,  0b1000, 0b10);
+  INSN(add_c, 0b1001, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct4, op)                                                               \
+  void NAME(Register Rs1) {                                                                  \
+    assert_cond(Rs1 != x0);                                                                  \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_reg_c((address)&insn, 2, x0);                                                      \
+    patch_reg_c((address)&insn, 7, Rs1);                                                     \
+    patch_c((address)&insn, 15, 12, funct4);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(jr_c,   0b1000, 0b10);
+  INSN(jalr_c, 0b1001, 0b10);
+
+#undef INSN
+
+  typedef void (Assembler::* j_c_insn)(address dest);
+  typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
+
+  void wrap_label(Label &L, j_c_insn insn);
+  void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn);
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(int32_t offset) {                                                                \
+    assert_cond(is_imm_in_range(offset, 11, 1));                                             \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
+    patch_c((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
+    patch_c((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
+    patch_c((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
+    patch_c((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
+    patch_c((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
+    patch_c((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
+    patch_c((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(address dest) {                                                                  \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 11, 1));                                           \
+    j_c(distance);                                                                           \
+  }                                                                                          \
+  void NAME(Label &L) {                                                                      \
+    wrap_label(L, &Assembler::NAME);                                                         \
+  }
+
+  INSN(j_c, 0b101, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs1, int32_t imm) {                                                     \
+    assert_cond(is_imm_in_range(imm, 8, 1));                                                 \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
+    patch_c((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1);                             \
+    patch_c((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6);                             \
+    patch_compressed_reg_c((address)&insn, 7, Rs1);                                          \
+    patch_c((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3);                           \
+    patch_c((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8);                                \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }                                                                                          \
+  void NAME(Register Rs1, address dest) {                                                    \
+    assert_cond(dest != NULL);                                                               \
+    int64_t distance = dest - pc();                                                          \
+    assert_cond(is_imm_in_range(distance, 8, 1));                                            \
+    NAME(Rs1, distance);                                                                     \
+  }                                                                                          \
+  void NAME(Register Rs1, Label &L) {                                                        \
+    wrap_label(L, Rs1, &Assembler::NAME);                                                    \
+  }
+
+  INSN(beqz_c, 0b110, 0b01);
+  INSN(bnez_c, 0b111, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 18, 0));                                                \
+    assert_cond((imm & 0xfff) == 0);                                                         \
+    assert_cond(imm != 0);                                                                   \
+    assert_cond(Rd != x0 && Rd != x2);                                                       \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12);                           \
+    patch_reg_c((address)&insn, 7, Rd);                                                      \
+    patch_c((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17);                              \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(lui_c, 0b011, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, int32_t imm) {                                                      \
+    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
+    patch_reg_c((address)&insn, 7, Rd);                                                      \
+    patch_c((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5);                           \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(li_c, 0b010, 0b01);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE, CHECK)                                         \
+  void NAME(REGISTER_TYPE Rd, uint32_t uimm) {                                               \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    IF(CHECK, assert_cond(Rd != x0);)                                                        \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
+    patch_c((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
+    patch_reg_c((address)&insn, 7, Rd);                                                      \
+    patch_c((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+#define IF(BOOL, ...)       IF_##BOOL(__VA_ARGS__)
+#define IF_true(code)       code
+#define IF_false(code)
+
+  INSN(ldsp_c,  0b011, 0b10, Register,      true);
+  INSN(fldsp_c, 0b001, 0b10, FloatRegister, false);
+
+#undef IF_false
+#undef IF_true
+#undef IF
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) {                             \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_compressed_reg_c((address)&insn, 2, Rd_Rs2);                                       \
+    patch_c((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6);                            \
+    patch_compressed_reg_c((address)&insn, 7, Rs1);                                          \
+    patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(ld_c,  0b011, 0b00, Register);
+  INSN(sd_c,  0b111, 0b00, Register);
+  INSN(fld_c, 0b001, 0b00, FloatRegister);
+  INSN(fsd_c, 0b101, 0b00, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
+  void NAME(REGISTER_TYPE Rs2, uint32_t uimm) {                                              \
+    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
+    assert_cond((uimm & 0b111) == 0);                                                        \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_reg_c((address)&insn, 2, Rs2);                                                     \
+    patch_c((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6);                            \
+    patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(sdsp_c,  0b111, 0b10, Register);
+  INSN(fsdsp_c, 0b101, 0b10, FloatRegister);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rs2, uint32_t uimm) {                                                   \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_reg_c((address)&insn, 2, Rs2);                                                     \
+    patch_c((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6);                            \
+    patch_c((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2);                           \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(swsp_c, 0b110, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd, uint32_t uimm) {                                                    \
+    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    assert_cond(Rd != x0);                                                                   \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6);                            \
+    patch_c((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2);                            \
+    patch_reg_c((address)&insn, 7, Rd);                                                      \
+    patch_c((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(lwsp_c, 0b010, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) {                                  \
+    assert_cond(is_unsigned_imm_in_range(uimm, 7, 0));                                       \
+    assert_cond((uimm & 0b11) == 0);                                                         \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_compressed_reg_c((address)&insn, 2, Rd_Rs2);                                       \
+    patch_c((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6);                                 \
+    patch_c((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
+    patch_compressed_reg_c((address)&insn, 7, Rs1);                                          \
+    patch_c((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(lw_c, 0b010, 0b00);
+  INSN(sw_c, 0b110, 0b00);
+
+#undef INSN
+
+#define INSN(NAME, funct3, op)                                                               \
+  void NAME() {                                                                              \
+    uint16_t insn = 0;                                                                       \
+    patch_c((address)&insn, 1, 0, op);                                                       \
+    patch_c((address)&insn, 11, 2, 0x0);                                                     \
+    patch_c((address)&insn, 12, 12, 0b1);                                                    \
+    patch_c((address)&insn, 15, 13, funct3);                                                 \
+    emit_int16(insn);                                                                        \
+  }
+
+  INSN(ebreak_c, 0b100, 0b10);
+
+#undef INSN
+
+// --------------  C-Ext Transformation Macros  --------------
+
+// a pivotal dispatcher for C-Ext
+#define EMIT_MAY_COMPRESS(COMPRESSIBLE, NAME, ...)    EMIT_MAY_COMPRESS_##COMPRESSIBLE(NAME, __VA_ARGS__)
+#define EMIT_MAY_COMPRESS_true(NAME, ...)             EMIT_MAY_COMPRESS_##NAME(__VA_ARGS__)
+#define EMIT_MAY_COMPRESS_false(NAME, ...)
+
+#define IS_COMPRESSIBLE(...)                          if (__VA_ARGS__)
+#define CHECK_CEXT_AND_COMPRESSIBLE(...)              IS_COMPRESSIBLE(UseRVC && __VA_ARGS__)
+#define CHECK_CEXT()                                  if (UseRVC)
+
+// C-Ext transformation macros
+#define EMIT_RVC_cond(PREFIX, COND, EMIT) {                                            \
+    PREFIX                                                                             \
+    CHECK_CEXT_AND_COMPRESSIBLE(COND) {                                                \
+      EMIT;                                                                            \
+      return;                                                                          \
+    }                                                                                  \
+  }
+
+#define EMIT_RVC_cond2(PREFIX, COND1, EMIT1, COND2, EMIT2) {                           \
+    PREFIX                                                                             \
+    CHECK_CEXT() {                                                                     \
+      IS_COMPRESSIBLE(COND1) {                                                         \
+        EMIT1;                                                                         \
+        return;                                                                        \
+      } else IS_COMPRESSIBLE(COND2) {                                                  \
+        EMIT2;                                                                         \
+        return;                                                                        \
+      }                                                                                \
+    }                                                                                  \
+  }
+
+#define EMIT_RVC_cond4(PREFIX, COND1, EMIT1, COND2, EMIT2, COND3, EMIT3, COND4, EMIT4) {  \
+    PREFIX                                                                             \
+    CHECK_CEXT() {                                                                     \
+      IS_COMPRESSIBLE(COND1) {                                                         \
+        EMIT1;                                                                         \
+        return;                                                                        \
+      } else IS_COMPRESSIBLE(COND2) {                                                  \
+        EMIT2;                                                                         \
+        return;                                                                        \
+      } else IS_COMPRESSIBLE(COND3) {                                                  \
+        EMIT3;                                                                         \
+        return;                                                                        \
+      } else IS_COMPRESSIBLE(COND4) {                                                  \
+        EMIT4;                                                                         \
+        return;                                                                        \
+      }                                                                                \
+    }                                                                                  \
+  }
+
+// --------------------------
+// Register instructions
+// --------------------------
+// add -> c.add
+#define EMIT_MAY_COMPRESS_add(Rd, Rs1, Rs2)                                            \
+  EMIT_RVC_cond(                                                                       \
+    Register src = noreg;,                                                             \
+    Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd)),      \
+    add_c(Rd, src)                                                                     \
+  )
+
+// --------------------------
+// sub/subw -> c.sub/c.subw
+#define EMIT_MAY_COMPRESS_sub_helper(NAME_C, Rd, Rs1, Rs2)                             \
+  EMIT_RVC_cond(,                                                                      \
+    Rs1 == Rd && Rd->is_compressed_valid() && Rs2->is_compressed_valid(),              \
+    NAME_C(Rd, Rs2)                                                                    \
+  )
+
+#define EMIT_MAY_COMPRESS_sub(Rd, Rs1, Rs2)                                            \
+  EMIT_MAY_COMPRESS_sub_helper(sub_c, Rd, Rs1, Rs2)
+
+#define EMIT_MAY_COMPRESS_subw(Rd, Rs1, Rs2)                                           \
+  EMIT_MAY_COMPRESS_sub_helper(subw_c, Rd, Rs1, Rs2)
+
+// --------------------------
+// xor/or/and/addw -> c.xor/c.or/c.and/c.addw
+#define EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(NAME_C, Rd, Rs1, Rs2)              \
+  EMIT_RVC_cond(                                                                       \
+    Register src = noreg;,                                                             \
+    Rs1->is_compressed_valid() && Rs2->is_compressed_valid() &&                        \
+      ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd)),                              \
+    NAME_C(Rd, src)                                                                    \
+  )
+
+#define EMIT_MAY_COMPRESS_xorr(Rd, Rs1, Rs2)                                           \
+  EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(xor_c, Rd, Rs1, Rs2)
+
+#define EMIT_MAY_COMPRESS_orr(Rd, Rs1, Rs2)                                            \
+  EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(or_c, Rd, Rs1, Rs2)
+
+#define EMIT_MAY_COMPRESS_andr(Rd, Rs1, Rs2)                                           \
+  EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(and_c, Rd, Rs1, Rs2)
+
+#define EMIT_MAY_COMPRESS_addw(Rd, Rs1, Rs2)                                           \
+  EMIT_MAY_COMPRESS_xorr_orr_andr_addw_helper(addw_c, Rd, Rs1, Rs2)
+
+// --------------------------
+// Load/store register (all modes)
+// --------------------------
+private:
+
+#define FUNC(NAME, funct3, bits)                                                       \
+  bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) {                   \
+    return rs1 == sp &&                                                                \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                      \
+      (intx(imm12) & funct3) == 0x0 &&                                                 \
+      (!ld || rd_rs2 != x0);                                                           \
+  }                                                                                    \
+
+  FUNC(is_ldsdsp_c,  0b111, 9);
+  FUNC(is_lwswsp_c,  0b011, 8);
+#undef FUNC
+
+#define FUNC(NAME, funct3, bits)                                                       \
+  bool NAME(Register rs1, int32_t imm12) {                                             \
+    return rs1 == sp &&                                                                \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                      \
+      (intx(imm12) & funct3) == 0x0;                                                   \
+  }                                                                                    \
+
+  FUNC(is_fldsdsp_c, 0b111, 9);
+#undef FUNC
+
+#define FUNC(NAME, REG_TYPE, funct3, bits)                                             \
+  bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) {                            \
+    return rs1->is_compressed_valid() &&                                               \
+      rd_rs2->is_compressed_valid() &&                                                 \
+      is_unsigned_imm_in_range(imm12, bits, 0) &&                                      \
+      (intx(imm12) & funct3) == 0x0;                                                   \
+  }                                                                                    \
+
+  FUNC(is_ldsd_c,  Register,      0b111, 8);
+  FUNC(is_lwsw_c,  Register,      0b011, 7);
+  FUNC(is_fldsd_c, FloatRegister, 0b111, 8);
+#undef FUNC
+
+public:
+// --------------------------
+// ld -> c.ldsp/c.ld
+#define EMIT_MAY_COMPRESS_ld(Rd, Rs, offset)                                           \
+  EMIT_RVC_cond2(,                                                                     \
+     is_ldsdsp_c(Rs, Rd, offset, true),                                                \
+     ldsp_c(Rd, offset),                                                               \
+     is_ldsd_c(Rs, Rd, offset),                                                        \
+     ld_c(Rd, Rs, offset)                                                              \
+  )
+
+// --------------------------
+// sd -> c.sdsp/c.sd
+#define EMIT_MAY_COMPRESS_sd(Rd, Rs, offset)                                           \
+  EMIT_RVC_cond2(,                                                                     \
+     is_ldsdsp_c(Rs, Rd, offset, false),                                               \
+     sdsp_c(Rd, offset),                                                               \
+     is_ldsd_c(Rs, Rd, offset),                                                        \
+     sd_c(Rd, Rs, offset)                                                              \
+  )
+
+// --------------------------
+// lw -> c.lwsp/c.lw
+#define EMIT_MAY_COMPRESS_lw(Rd, Rs, offset)                                           \
+  EMIT_RVC_cond2(,                                                                     \
+     is_lwswsp_c(Rs, Rd, offset, true),                                                \
+     lwsp_c(Rd, offset),                                                               \
+     is_lwsw_c(Rs, Rd, offset),                                                        \
+     lw_c(Rd, Rs, offset)                                                              \
+  )
+
+// --------------------------
+// sw -> c.swsp/c.sw
+#define EMIT_MAY_COMPRESS_sw(Rd, Rs, offset)                                           \
+  EMIT_RVC_cond2(,                                                                     \
+     is_lwswsp_c(Rs, Rd, offset, false),                                               \
+     swsp_c(Rd, offset),                                                               \
+     is_lwsw_c(Rs, Rd, offset),                                                        \
+     sw_c(Rd, Rs, offset)                                                              \
+  )
+
+// --------------------------
+// fld -> c.fldsp/c.fld
+#define EMIT_MAY_COMPRESS_fld(Rd, Rs, offset)                                          \
+  EMIT_RVC_cond2(,                                                                     \
+     is_fldsdsp_c(Rs, offset),                                                         \
+     fldsp_c(Rd, offset),                                                              \
+     is_fldsd_c(Rs, Rd, offset),                                                       \
+     fld_c(Rd, Rs, offset)                                                             \
+  )
+
+// --------------------------
+// fsd -> c.fsdsp/c.fsd
+#define EMIT_MAY_COMPRESS_fsd(Rd, Rs, offset)                                          \
+  EMIT_RVC_cond2(,                                                                     \
+     is_fldsdsp_c(Rs, offset),                                                         \
+     fsdsp_c(Rd, offset),                                                              \
+     is_fldsd_c(Rs, Rd, offset),                                                       \
+     fsd_c(Rd, Rs, offset)                                                             \
+  )
+
+// --------------------------
+// Conditional branch instructions
+// --------------------------
+// beq/bne -> c.beqz/c.bnez
+
+// TODO: Removing the below 'offset != 0' check needs us to fix lots of '__ beqz() / __ benz()'
+//   to '__ beqz_nc() / __ bnez_nc()' everywhere.
+#define EMIT_MAY_COMPRESS_beqz_bnez_helper(NAME_C, Rs1, Rs2, offset)                   \
+  EMIT_RVC_cond(,                                                                      \
+    offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() &&                          \
+      is_imm_in_range(offset, 8, 1),                                                   \
+    NAME_C(Rs1, offset)                                                                \
+  )
+
+#define EMIT_MAY_COMPRESS_beq(Rs1, Rs2, offset)                                        \
+  EMIT_MAY_COMPRESS_beqz_bnez_helper(beqz_c, Rs1, Rs2, offset)
+
+#define EMIT_MAY_COMPRESS_bne(Rs1, Rs2, offset)                                        \
+  EMIT_MAY_COMPRESS_beqz_bnez_helper(bnez_c, Rs1, Rs2, offset)
+
+// --------------------------
+// Unconditional branch instructions
+// --------------------------
+// jalr/jal -> c.jr/c.jalr/c.j
+
+#define EMIT_MAY_COMPRESS_jalr(Rd, Rs, offset)                                         \
+  EMIT_RVC_cond2(,                                                                     \
+    offset == 0 && Rd == x1 && Rs != x0,                                               \
+    jalr_c(Rs),                                                                        \
+    offset == 0 && Rd == x0 && Rs != x0,                                               \
+    jr_c(Rs)                                                                           \
+  )
+
+// TODO: Removing the 'offset != 0' check needs us to fix lots of '__ j()'
+//   to '__ j_nc()' manually everywhere.
+#define EMIT_MAY_COMPRESS_jal(Rd, offset)                                              \
+  EMIT_RVC_cond(,                                                                      \
+    offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1),                         \
+    j_c(offset)                                                                        \
+  )
+
+// --------------------------
+// Upper Immediate Instruction
+// --------------------------
+// lui -> c.lui
+#define EMIT_MAY_COMPRESS_lui(Rd, imm)                                                 \
+  EMIT_RVC_cond(,                                                                      \
+    Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0),                   \
+    lui_c(Rd, imm)                                                                     \
+  )
+
+// --------------------------
+// Miscellaneous Instructions
+// --------------------------
+// ebreak -> c.ebreak
+#define EMIT_MAY_COMPRESS_ebreak()                                                     \
+  EMIT_RVC_cond(,                                                                      \
+    true,                                                                              \
+    ebreak_c()                                                                         \
+  )
+
+// --------------------------
+// Immediate Instructions
+// --------------------------
+// addi -> c.addi16sp/c.addi4spn/c.mv/c.addi/. An addi instruction able to transform to c.nop will be ignored.
+#define EMIT_MAY_COMPRESS_addi(Rd, Rs1, imm)                                                          \
+  EMIT_RVC_cond4(,                                                                                    \
+    Rs1 == sp && Rd == Rs1 && imm != 0 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0),       \
+    addi16sp_c(imm),                                                                                  \
+    Rs1 == sp && Rd->is_compressed_valid() && imm != 0 && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0),  \
+    addi4spn_c(Rd, imm),                                                                              \
+    Rd == Rs1 && is_imm_in_range(imm, 6, 0),                                                          \
+    if (imm != 0) { addi_c(Rd, imm); },                                                               \
+    imm == 0 && Rd != x0 && Rs1 != x0,                                                                \
+    mv_c(Rd, Rs1)                                                                                     \
+  )
+
+// --------------------------
+// addiw -> c.addiw
+#define EMIT_MAY_COMPRESS_addiw(Rd, Rs1, imm)                                          \
+  EMIT_RVC_cond(,                                                                      \
+    Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0),                               \
+    addiw_c(Rd, imm)                                                                   \
+  )
+
+// --------------------------
+// and_imm12 -> c.andi
+#define EMIT_MAY_COMPRESS_and_imm12(Rd, Rs1, imm)                                      \
+  EMIT_RVC_cond(,                                                                      \
+    Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0),              \
+    andi_c(Rd, imm)                                                                    \
+  )
+
+// --------------------------
+// Shift Immediate Instructions
+// --------------------------
+// slli -> c.slli
+#define EMIT_MAY_COMPRESS_slli(Rd, Rs1, shamt)                                         \
+  EMIT_RVC_cond(,                                                                      \
+    Rd == Rs1 && Rd != x0 && shamt != 0,                                               \
+    slli_c(Rd, shamt)                                                                  \
+  )
+
+// --------------------------
+// srai/srli -> c.srai/c.srli
+#define EMIT_MAY_COMPRESS_srai_srli_helper(NAME_C, Rd, Rs1, shamt)                     \
+  EMIT_RVC_cond(,                                                                      \
+    Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0,                              \
+    NAME_C(Rd, shamt)                                                                  \
+  )
+
+#define EMIT_MAY_COMPRESS_srai(Rd, Rs1, shamt)                                         \
+  EMIT_MAY_COMPRESS_srai_srli_helper(srai_c, Rd, Rs1, shamt)
+
+#define EMIT_MAY_COMPRESS_srli(Rd, Rs1, shamt)                                         \
+  EMIT_MAY_COMPRESS_srai_srli_helper(srli_c, Rd, Rs1, shamt)
+
+// --------------------------
+
+// a compile time dispatcher
+#define EMIT_MAY_COMPRESS_NAME_true(NAME, ARGS)            NAME ARGS
+#define EMIT_MAY_COMPRESS_NAME_false(NAME, ARGS)           NAME##_nc ARGS
+#define EMIT_MAY_COMPRESS_NAME(COMPRESSIBLE, NAME, ARGS)   EMIT_MAY_COMPRESS_NAME_##COMPRESSIBLE(NAME, ARGS)
+
+// a runtime dispatcher (if clause is needed)
+#define EMIT_MAY_COMPRESS_INST(COMPRESSIBLE, NAME, ARGS) \
+  if (COMPRESSIBLE) {                                    \
+    EMIT_MAY_COMPRESS_NAME_true(NAME, ARGS);             \
+  } else {                                               \
+    EMIT_MAY_COMPRESS_NAME_false(NAME, ARGS);            \
+  }
+
+#endif // CPU_RISCV_ASSEMBLER_RISCV_CEXT_HPP
\ No newline at end of file
diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
index 687381c0897..7b96e358517 100644
--- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
@@ -44,7 +44,7 @@ void C1SafepointPollStub::emit_code(LIR_Assembler* ce)
   __ bind(_entry);
   InternalAddress safepoint_pc(__ pc() - __ offset() + safepoint_offset());
   __ code_section()->relocate(__ pc(), safepoint_pc.rspec());
-  __ la(t0, safepoint_pc.target());
+  __ la(t0, safepoint_pc.target(), NOT_COMPRESSIBLE);
   __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
 
   assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
@@ -106,9 +106,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce)
     __ mv(t1, _array->as_pointer_register());
     stub_id = Runtime1::throw_range_check_failed_id;
   }
-  int32_t off = 0;
-  __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off);
-  __ jalr(ra, ra, off);
+  __ jalr_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), ra);
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
   debug_only(__ should_not_reach_here());
@@ -257,7 +255,7 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce)
   __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 }
 
-int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::get_instruction_size();
 
 void PatchingStub::align_patch_site(MacroAssembler* masm) {}
 
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index 14ec4a5f995..f3d132889dc 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -1318,7 +1318,12 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
   }
 }
 
-void LIR_Assembler::align_call(LIR_Code code) {  }
+void LIR_Assembler::align_call(LIR_Code code) {
+  // C-Ext: With C-Ext a call may get 2-byte aligned.
+  //   the address of jal itself (which will be patched later) should not span the cache line.
+  //   See CallDynamicJavaDirectNode::compute_padding() for more info.
+  __ align(4);
+}
 
 void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
   address call = __ trampoline_call(Address(op->addr(), rtype));
@@ -1375,9 +1380,7 @@ void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmit
   }
   int pc_for_athrow_offset = __ offset();
   InternalAddress pc_for_athrow(__ pc());
-  int32_t off = 0;
-  __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off);
-  __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off);
+  __ addi_patchable(exceptionPC->as_register(), pc_for_athrow, exceptionPC->as_register());
   add_call_info(pc_for_athrow_offset, info); // for exception handler
 
   __ verify_not_null_oop(x10);
@@ -1801,9 +1804,7 @@ void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* arg
   if (cb != NULL) {
     __ far_call(RuntimeAddress(dest));
   } else {
-    int32_t offset = 0;
-    __ la_patchable(t0, RuntimeAddress(dest), offset);
-    __ jalr(x1, t0, offset);
+    __ jalr_patchable(x1, RuntimeAddress(dest), t0);
   }
 
   if (info != NULL) {
diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
index 02c54ee959c..0ef83964d48 100644
--- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
@@ -325,7 +325,7 @@ void C1_MacroAssembler::verified_entry() {
   // must ensure that this first instruction is a J, JAL or NOP.
   // Make it a NOP.
 
-  nop();
+  nop_nc();
 }
 
 void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
index b3b405f3040..ff4fd393e2e 100644
--- a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
@@ -67,9 +67,7 @@ int StubAssembler::call_RT(Register oop_result, Register metadata_result, addres
   set_last_Java_frame(sp, fp, retaddr, t0);
 
   // do the call
-  int32_t off = 0;
-  la_patchable(t0, RuntimeAddress(entry), off);
-  jalr(x1, t0, off);
+  jalr_patchable(x1, RuntimeAddress(entry), t0);
   bind(retaddr);
   int call_offset = offset();
   // verify callee-saved register
@@ -569,9 +567,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
   Label retaddr;
   __ set_last_Java_frame(sp, fp, retaddr, t0);
   // do the call
-  int32_t off = 0;
-  __ la_patchable(t0, RuntimeAddress(target), off);
-  __ jalr(x1, t0, off);
+  __ jalr_patchable(x1, RuntimeAddress(target), t0);
   __ bind(retaddr);
   OopMapSet* oop_maps = new OopMapSet();
   assert_cond(oop_maps != NULL);
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index ff030372712..203556b2644 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1196,21 +1196,21 @@ typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1,
 static conditional_branch_insn conditional_branches[] =
 {
   /* SHORT branches */
-  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::beq_nc,
   (conditional_branch_insn)&Assembler::bgt,
   NULL, // BoolTest::overflow
   (conditional_branch_insn)&Assembler::blt,
-  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::bne_nc,
   (conditional_branch_insn)&Assembler::ble,
   NULL, // BoolTest::no_overflow
   (conditional_branch_insn)&Assembler::bge,
 
   /* UNSIGNED branches */
-  (conditional_branch_insn)&Assembler::beq,
+  (conditional_branch_insn)&Assembler::beq_nc,
   (conditional_branch_insn)&Assembler::bgtu,
   NULL,
   (conditional_branch_insn)&Assembler::bltu,
-  (conditional_branch_insn)&Assembler::bne,
+  (conditional_branch_insn)&Assembler::bne_nc,
   (conditional_branch_insn)&Assembler::bleu,
   NULL,
   (conditional_branch_insn)&Assembler::bgeu
@@ -1259,11 +1259,11 @@ void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1,
   switch (cmpFlag) {
     case BoolTest::eq:
     case BoolTest::le:
-      beqz(op1, L, is_far);
+      beqz_nc(op1, L, is_far);
       break;
     case BoolTest::ne:
     case BoolTest::gt:
-      bnez(op1, L, is_far);
+      bnez_nc(op1, L, is_far);
       break;
     default:
       ShouldNotReachHere();
@@ -1273,10 +1273,10 @@ void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1,
 void C2_MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
   switch (cmpFlag) {
     case BoolTest::eq:
-      beqz(op1, L, is_far);
+      beqz_nc(op1, L, is_far);
       break;
     case BoolTest::ne:
-      bnez(op1, L, is_far);
+      bnez_nc(op1, L, is_far);
       break;
     default:
       ShouldNotReachHere();
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
index ef854dc2fce..fd376d32824 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
@@ -82,6 +82,16 @@
   static const int double_branch_mask = 1 << bool_test_bits;
 
   // cmp
+  // C-Ext: these cmp functions remain uncompressed in C2 MachNodes' emission -
+  //   as the reason described in MachEpilogNode::emit() in PhaseOutput::scratch_emit_size()
+  //   it simulates a node's size, but for MachBranchNodes it emits a fake Label just
+  //   near the node itself - the offset is so small that in scratch emission phase it always
+  //   get compressed in our implicit compression phase - but in real world the Label may be
+  //   anywhere so it may not be compressed, so here is the mismatch: it runs shorten_branches();
+  //   but with C-Ext we may need a further, say, shorten_compressed_branches() or something.
+  //   After researching we find performance will not have much enhancement even if compressing
+  //   them and the cost is a bit big to support MachBranchNodes' compression.
+  //   So as a solution, we can simply disable the compression of MachBranchNodes.
   void cmp_branch(int cmpFlag,
                   Register op1, Register op2,
                   Label& label, bool is_far = false);
diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
index 0b01a47bd5c..4a86b964c2f 100644
--- a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
@@ -46,7 +46,7 @@ define_pd_global(intx, OnStackReplacePercentage,     140);
 define_pd_global(intx, ConditionalMoveLimit,         0);
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
-define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, InteriorEntryAlignment,       4);
 define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
 define_pd_global(intx, LoopPercentProfileLimit,      10);
diff --git a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
index 8956e4b7941..6469f6fe6cf 100644
--- a/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_safepointPollStubTable_riscv.cpp
@@ -41,7 +41,7 @@ void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointP
   __ bind(entry->_stub_label);
   InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
   masm.code_section()->relocate(masm.pc(), safepoint_pc.rspec());
-  __ la(t0, safepoint_pc.target());
+  __ la(t0, safepoint_pc.target(), NOT_COMPRESSIBLE);
   __ sd(t0, Address(xthread, JavaThread::saved_exception_pc_offset()));
   __ far_jump(callback_addr);
 }
diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
index 75bc4be7840..72215c14071 100644
--- a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
+++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
@@ -69,8 +69,12 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark)
 #undef __
 
 int CompiledStaticCall::to_interp_stub_size() {
-  // fence_i + fence* + (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr
-  return NativeFenceI::instruction_size() + 12 * NativeInstruction::instruction_size;
+  // fence_i + fence* + (lui, addi, slli(C), addi, slli(C), addi) + (lui, addi, slli(C), addi, slli(C)) + jalr
+  return NativeFenceI::instruction_size() +
+         (!UseRVC ?
+           12 * NativeInstruction::instruction_size :
+           8 * NativeInstruction::instruction_size + 4 * NativeInstruction::compressed_instruction_size
+         );
 }
 
 int CompiledStaticCall::to_trampoline_stub_size() {
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
index f23ff34e3f4..7049b720d3e 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
@@ -180,20 +180,12 @@ void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj,
 
     // Get the current end of the heap
     ExternalAddress address_end((address) Universe::heap()->end_addr());
-    {
-      int32_t offset;
-      __ la_patchable(t1, address_end, offset);
-      __ ld(t1, Address(t1, offset));
-    }
+    __ ld_patchable(t1, address_end, t1);
 
     // Get the current top of the heap
     ExternalAddress address_top((address) Universe::heap()->top_addr());
-    {
-      int32_t offset;
-      __ la_patchable(t0, address_top, offset);
-      __ addi(t0, t0, offset);
-      __ lr_d(obj, t0, Assembler::aqrl);
-    }
+    __ addi_patchable(t0, address_top, t0);
+    __ lr_d(obj, t0, Assembler::aqrl);
 
     // Adjust it my the size of our new object
     if (var_size_in_bytes == noreg) {
@@ -231,6 +223,8 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
   __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
 }
 
+extern int nmethod_barrier_guard_offset();
+
 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
   BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
 
@@ -238,6 +232,12 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
     return;
   }
 
+  // C-Ext: With C-Ext we may come here with a 2-byte alignment, hence an alignment is needed.
+  // See below comments about amo, also native_nmethod_barrier() to find the entry's calculation strategy.
+  while ((__ offset() + nmethod_barrier_guard_offset()) % 4 != 0) { __ nop(); }
+
+  int start = __ offset();
+
   Label skip, guard;
   Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
 
@@ -250,10 +250,14 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
   __ beq(t0, t1, skip);
 
   int32_t offset = 0;
-  __ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset);
-  __ jalr(ra, t0, offset);
+  __ movptr_with_offset(t0, StubRoutines::riscv64::method_entry_barrier(), offset, NOT_COMPRESSIBLE);
+  __ jalr_nc(ra, t0, offset);
   __ j(skip);
 
+  // RISCV's amoswap instructions need an alignment for the memory address it swaps
+  // C-Ext: So with C-Ext we need to manually align it to 4-byte
+  assert(__ offset() - start == nmethod_barrier_guard_offset() && __ offset() % 4 == 0, "offsets equality and alignment");
+
   __ bind(guard);
 
   __ emit_int32(0); // nmethod guard value. Skipped over in common case.
diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
index ae7ee4c5a44..4f276ef633d 100644
--- a/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetNMethod_riscv.cpp
@@ -36,11 +36,21 @@
 #include "utilities/debug.hpp"
 
 class NativeNMethodBarrier: public NativeInstruction {
+public:
+  enum {
+    total_normal_guard_offset     = 12 * instruction_size,
+    total_compressed_guard_offset = 10 * instruction_size + 2 * compressed_instruction_size,
+
+    total_normal_size             = total_normal_guard_offset + 4,
+    total_compressed_size         = total_compressed_guard_offset + 4,
+  };
+
+private:
   address instruction_address() const { return addr_at(0); }
 
   int *guard_addr() {
-    /* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
-    return reinterpret_cast<int*>(instruction_address() + 12 * 4);
+    /* auipc + lwu + fence + lwu + beq + lui + addi + (C)slli + addi + (C)slli + jalr + j */
+    return reinterpret_cast<int*>(instruction_address() + guard_offset());
   }
 
 public:
@@ -53,28 +63,55 @@ class NativeNMethodBarrier: public NativeInstruction {
   }
 
   void verify() const;
+
+  static int guard_offset() {
+    return UseRVC ? total_compressed_guard_offset : total_normal_guard_offset;
+  }
 };
 
+int nmethod_barrier_guard_offset() {
+  return NativeNMethodBarrier::guard_offset();
+}
+
 // Store the instruction bitmask, bits and name for checking the barrier.
 struct CheckInsn {
   uint32_t mask;
   uint32_t bits;
   const char *name;
+  int instruction_size;
 };
 
 static const struct CheckInsn barrierInsn[] = {
-  { 0x00000fff, 0x00000297, "auipc  t0, 0           "},
-  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      "},
-  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          "},
-  { 0x000fffff, 0x000be303, "lwu    t1, 112(xthread)"},
-  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    "},
-  { 0x00000fff, 0x000002b7, "lui    t0, imm0        "},
-  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    "},
-  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      "},
-  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    "},
-  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       "},
-  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    "},
-  { 0x00000fff, 0x0000006f, "j      skip            "}
+  { 0x00000fff, 0x00000297, "auipc  t0, 0           ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x0002e283, "lwu    t0, 48(t0)      ", NativeInstruction::instruction_size},
+  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x000be303, "lwu    t1, 36(xthread) ", NativeInstruction::instruction_size},
+  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    ", NativeInstruction::instruction_size},
+  { 0x00000fff, 0x000002b7, "lui    t0, imm0        ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    ", NativeInstruction::instruction_size},
+  { 0xffffffff, 0x00b29293, "slli   t0, t0, 11      ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    ", NativeInstruction::instruction_size},
+  { 0xffffffff, 0x00529293, "slli   t0, t0, 5       ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    ", NativeInstruction::instruction_size},
+  { 0x00000fff, 0x0000006f, "j      skip            ", NativeInstruction::instruction_size}
+  /* guard: */
+  /* 32bit nmethod guard value */
+  /* skip: */
+};
+
+static const struct CheckInsn barrierCInsn[] = {
+  { 0x00000fff, 0x00000297, "auipc  t0, 0           ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x0002e283, "lwu    t0, 44(t0)      ", NativeInstruction::instruction_size},
+  { 0xffffffff, 0x0aa0000f, "fence  ir, ir          ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x000be303, "lwu    t1, 36(xthread) ", NativeInstruction::instruction_size},
+  { 0x01fff07f, 0x00628063, "beq    t0, t1, skip    ", NativeInstruction::instruction_size},
+  { 0x00000fff, 0x000002b7, "lui    t0, imm0        ", NativeInstruction::instruction_size},
+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm1    ", NativeInstruction::instruction_size},
+  { 0x00000fff, 0x02ae,     "c.slli t0, t0, 11      ", NativeInstruction::compressed_instruction_size},
+  { 0x000fffff, 0x00028293, "addi   t0, t0, imm2    ", NativeInstruction::instruction_size},
+  { 0x0000ffff, 0x0296,     "c.slli t0, t0, 5       ", NativeInstruction::compressed_instruction_size},
+  { 0x000fffff, 0x000280e7, "jalr   ra, imm3(t0)    ", NativeInstruction::instruction_size},
+  { 0x00000fff, 0x0000006f, "j      skip            ", NativeInstruction::instruction_size}
   /* guard: */
   /* 32bit nmethod guard value */
   /* skip: */
@@ -85,13 +122,22 @@ static const struct CheckInsn barrierInsn[] = {
 // register numbers and immediate values in the encoding.
 void NativeNMethodBarrier::verify() const {
   intptr_t addr = (intptr_t) instruction_address();
-  for(unsigned int i = 0; i < sizeof(barrierInsn)/sizeof(struct CheckInsn); i++ ) {
-    uint32_t inst = *((uint32_t*) addr);
-    if ((inst & barrierInsn[i].mask) != barrierInsn[i].bits) {
+  const struct CheckInsn *insns;
+  size_t size;
+  if (!UseRVC) {
+    insns = barrierInsn;
+    size = sizeof(barrierInsn) / sizeof(struct CheckInsn);
+  } else {
+    insns = barrierCInsn;
+    size = sizeof(barrierCInsn) / sizeof(struct CheckInsn);
+  }
+  for(unsigned int i = 0; i < size; i++ ) {
+    uint32_t inst = insns[i].instruction_size == NativeInstruction::compressed_instruction_size ? *((uint16_t*) addr) : *((uint32_t*) addr);
+    if ((inst & insns[i].mask) != insns[i].bits) {
       tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", addr, inst);
-      fatal("not an %s instruction.", barrierInsn[i].name);
+      fatal("not an %s instruction.", insns[i].name);
     }
-    addr += 4;
+    addr += insns[i].instruction_size;
   }
 }
 
@@ -141,10 +187,15 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
 
 // see BarrierSetAssembler::nmethod_entry_barrier
 // auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
-static const int entry_barrier_offset = -4 * 13;
+static const int entry_barrier_normal_offset = -NativeNMethodBarrier::total_normal_size;
+static const int entry_barrier_compressed_offset = -NativeNMethodBarrier::total_compressed_size;
+
+static const int entry_barrier_offset() {
+  return !UseRVC ? entry_barrier_normal_offset : entry_barrier_compressed_offset;
+}
 
 static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
-  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
+  address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset();
   NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
   debug_only(barrier->verify());
   return barrier;
diff --git a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
index a1d4be63f61..4ce8013644f 100644
--- a/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/z/zBarrierSetAssembler_riscv.cpp
@@ -336,9 +336,7 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
   {
     ZSaveLiveRegisters save_live_registers(masm, stub);
     ZSetupArguments setup_arguments(masm, stub);
-    int32_t offset = 0;
-    __ la_patchable(t0, stub->slow_path(), offset);
-    __ jalr(x1, t0, offset);
+    __ jalr_patchable(x1, stub->slow_path(), t0);
   }
 
   // Stub exit
diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
index 0f48c46b409..b0896aea0a6 100644
--- a/src/hotspot/cpu/riscv/globals_riscv.hpp
+++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
@@ -37,7 +37,7 @@ define_pd_global(bool, TrapBasedNullChecks,      false);
 define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 
 define_pd_global(uintx, CodeCacheSegmentSize,    64 COMPILER1_AND_COMPILER2_PRESENT(+64)); // Tiered compilation has large code-entry alignment.
-define_pd_global(intx, CodeEntryAlignment,       64);
+define_pd_global(intx, CodeEntryAlignment,       16);
 define_pd_global(intx, OptoLoopAlignment,        16);
 
 #define DEFAULT_STACK_YELLOW_PAGES (2)
@@ -90,6 +90,7 @@ define_pd_global(intx, InlineSmallCode,          1000);
           "Extend fence.i to fence.i + fence.")                                  \
   product(bool, AvoidUnalignedAccesses, true,                                    \
           "Avoid generating unaligned memory accesses")                          \
-  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")
+  product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions")             \
+  product(bool, UseRVC, true, EXPERIMENTAL, "Use RVC instructions")              \
 
 #endif // CPU_RISCV_GLOBALS_RISCV_HPP
diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
index 549d56eb94e..88a6961b2e3 100644
--- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
+++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
@@ -182,9 +182,7 @@ void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, i
 }
 
 void InterpreterMacroAssembler::get_dispatch() {
-  int32_t offset = 0;
-  la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
-  addi(xdispatch, xdispatch, offset);
+  addi_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), xdispatch);
 }
 
 void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
index 814ed23e471..55b186f55d6 100644
--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
@@ -74,9 +74,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
   address fast_entry = __ pc();
 
   Label slow;
-  int32_t offset = 0;
-  __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset);
-  __ addi(rcounter_addr, rcounter_addr, offset);
+  __ addi_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), rcounter_addr);
 
   Address safepoint_counter_addr(rcounter_addr, 0);
   __ lwu(rcounter, safepoint_counter_addr);
@@ -169,9 +167,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
 
   {
     __ enter();
-    int32_t tmp_offset = 0;
-    __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset);
-    __ jalr(x1, t0, tmp_offset);
+    __ jalr_patchable(x1, ExternalAddress(slow_case_addr), t0);
     __ leave();
     __ ret();
   }
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index c9dcae99747..16cfc7cd23b 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -225,10 +225,11 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
                                          Register last_java_fp,
                                          address  last_java_pc,
-                                         Register temp) {
+                                         Register temp,
+                                         bool compressible) {
   assert(last_java_pc != NULL, "must provide a valid PC");
 
-  la(temp, last_java_pc);
+  la(temp, last_java_pc, compressible);
   sd(temp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 
   set_last_Java_frame(last_java_sp, last_java_fp, noreg, temp);
@@ -243,7 +244,7 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
   } else {
     InstructionMark im(this);
     L.add_patch_at(code(), locator());
-    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp);
+    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp, NOT_COMPRESSIBLE);
   }
 }
 
@@ -308,9 +309,7 @@ void MacroAssembler::call_VM_base(Register oop_result,
     ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
     Label ok;
     beqz(t0, ok);
-    int32_t offset = 0;
-    la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
-    jalr(x0, t0, offset);
+    jalr_patchable(x0, RuntimeAddress(StubRoutines::forward_exception_entry()), t0);
     bind(ok);
   }
 
@@ -384,9 +383,7 @@ void MacroAssembler::verify_oop(Register reg, const char* s) {
   }
 
   // call indirectly to solve generation ordering problem
-  int32_t offset = 0;
-  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-  ld(t1, Address(t1, offset));
+  ld_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), t1);
   jalr(t1);
 
   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
@@ -423,9 +420,7 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
   }
 
   // call indirectly to solve generation ordering problem
-  int32_t offset = 0;
-  la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
-  ld(t1, Address(t1, offset));
+  ld_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), t1);
   jalr(t1);
 
   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
@@ -535,12 +530,18 @@ void MacroAssembler::resolve_jobject(Register value, Register thread, Register t
   bind(done);
 }
 
-void MacroAssembler::stop(const char* msg) {
+// C-Ext: we may need to disable the compression for some instructions
+//   in some Nodes during C2 code emission, to emit the same constant
+//   instruction size both in PhaseOutput::scratch_emit_size()
+//   and the final real code emission.
+//   See: MachEpilogNode::emit() for more details.
+void MacroAssembler::stop(const char* msg, bool compressible) {
   address ip = pc();
   pusha();
   if(msg != NULL && ip != NULL) {
     li(c_rarg0, (uintptr_t)(address)msg);
-    li(c_rarg1, (uintptr_t)(address)ip);
+    // C-Ext: use a fixed-length movptr
+    movptr(c_rarg1, (address)ip, compressible);
   } else {
     ShouldNotReachHere();
   }
@@ -571,8 +572,8 @@ void MacroAssembler::emit_static_call_stub() {
 
   // Jump to the entry point of the i2c stub.
   int32_t offset = 0;
-  movptr_with_offset(t0, 0, offset);
-  jalr(x0, t0, offset);
+  movptr_with_offset(t0, 0, offset, NOT_COMPRESSIBLE);
+  jalr_nc(x0, t0, offset);
 }
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments,
@@ -658,7 +659,15 @@ void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Reg
 }
 
 void MacroAssembler::nop() {
-  addi(x0, x0, 0);
+  if (UseRVC) {
+    nop_c();
+  } else {
+    addi(x0, x0, 0);
+  }
+}
+
+void MacroAssembler::nop_nc() {
+  addi_nc(x0, x0, 0);
 }
 
 void MacroAssembler::mv(Register Rd, Register Rs) {
@@ -739,13 +748,13 @@ void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
   vfsgnjn_vv(vd, vs, vs);
 }
 
-void MacroAssembler::la(Register Rd, const address &dest) {
+void MacroAssembler::la(Register Rd, const address &dest, bool compressible) {
   int64_t offset = dest - pc();
   if (is_offset_in_range(offset, 32)) {
     auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
-    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
+    EMIT_MAY_COMPRESS_INST(compressible, addi, (Rd, Rd, ((int64_t)offset << 52) >> 52));
   } else {
-    movptr(Rd, dest);
+    movptr(Rd, dest, compressible);
   }
 }
 
@@ -759,7 +768,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
       if (rtype == relocInfo::none) {
         li(Rd, (intptr_t)(adr.target()));
       } else {
-        movptr(Rd, adr.target());
+        movptr(Rd, adr.target(), NOT_COMPRESSIBLE);
       }
       break;
     }
@@ -775,7 +784,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
 }
 
 void MacroAssembler::la(Register Rd, Label &label) {
-  la(Rd, target(label));
+  la(Rd, target(label), NOT_COMPRESSIBLE);
 }
 
 #define INSN(NAME)                                                                \
@@ -795,6 +804,19 @@ void MacroAssembler::la(Register Rd, Label &label) {
 
 #undef INSN
 
+#define INSN(NAME)                                                                \
+  void MacroAssembler::NAME##z_nc(Register Rs, const address &dest) {             \
+    NAME##_nc(Rs, zr, dest);                                                      \
+  }                                                                               \
+  void MacroAssembler::NAME##z_nc(Register Rs, Label &l, bool is_far) {           \
+    NAME##_nc(Rs, zr, l, is_far);                                                 \
+  }                                                                               \
+
+  INSN(beq);
+  INSN(bne);
+
+#undef INSN
+
 // Float compare branch instructions
 
 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
@@ -807,8 +829,9 @@ void MacroAssembler::la(Register Rd, Label &label) {
     BRANCH(t0, l, is_far);                                                                                             \
   }
 
-  INSN(beq, feq, bnez);
-  INSN(bne, feq, beqz);
+  INSN(beq, feq, bnez_nc);
+  INSN(bne, feq, beqz_nc);
+
 #undef INSN
 
 
@@ -818,11 +841,11 @@ void MacroAssembler::la(Register Rd, Label &label) {
     if(is_unordered) {                                                                \
       /* jump if either source is NaN or condition is expected */                     \
       FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
-      beqz(t0, l, is_far);                                                            \
+      beqz_nc(t0, l, is_far);                                                         \
     } else {                                                                          \
       /* jump if no NaN in source and condition is expected */                        \
       FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
-      bnez(t0, l, is_far);                                                            \
+      bnez_nc(t0, l, is_far);                                                         \
     }                                                                                 \
   }                                                                                   \
   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
@@ -830,11 +853,11 @@ void MacroAssembler::la(Register Rd, Label &label) {
     if(is_unordered) {                                                                \
       /* jump if either source is NaN or condition is expected */                     \
       FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
-      beqz(t0, l, is_far);                                                            \
+      beqz_nc(t0, l, is_far);                                                         \
     } else {                                                                          \
       /* jump if no NaN in source and condition is expected */                        \
       FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
-      bnez(t0, l, is_far);                                                            \
+      bnez_nc(t0, l, is_far);                                                         \
     }                                                                                 \
   }
 
@@ -1186,21 +1209,46 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_byte
 }
 
 static int patch_offset_in_jal(address branch, int64_t offset) {
-  assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
-  Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
-  Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
-  Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
-  Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
-  return NativeInstruction::instruction_size;                                   // only one instruction
+  if (!NativeInstruction::is_compressed_instr(branch)) {
+    assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal instruction!\n");
+    Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
+    Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
+    Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
+    Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
+    return NativeInstruction::instruction_size;                                   // only one instruction
+  } else {  // we must patch it, so I don't check if current instruction is a compressed instruction because it must be.
+    assert(is_imm_in_range(offset, 11, 1), "offset is too large to be patched in one c.j instruction: use j_nc() instead of your j().\n");
+    Assembler::patch_c(branch, 2, 2, (offset & nth_bit(5)) >> 5);              // offset[5]     ==> branch[2]
+    Assembler::patch_c(branch, 5, 3, (offset & right_n_bits(4)) >> 1);         // offset[3:1]   ==> branch[5:3]
+    Assembler::patch_c(branch, 6, 6, (offset & nth_bit(7)) >> 7);              // offset[7]     ==> branch[6]
+    Assembler::patch_c(branch, 7, 7, (offset & nth_bit(6)) >> 6);              // offset[6]     ==> branch[7]
+    Assembler::patch_c(branch, 8, 8, (offset & nth_bit(10)) >> 10);            // offset[10]    ==> branch[8]
+    Assembler::patch_c(branch, 10, 9, (offset & right_n_bits(10)) >> 8);       // offset[9:8]   ==> branch[10:9]
+    Assembler::patch_c(branch, 11, 11, (offset & nth_bit(4)) >> 4);            // offset[4]     ==> branch[11]
+    Assembler::patch_c(branch, 12, 12, (offset & nth_bit(11)) >> 11);          // offset[11]    ==> branch[12]
+    return NativeInstruction::compressed_instruction_size;                     // only one instruction
+  }
 }
 
 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
-  assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
-  Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
-  Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
-  Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
-  Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
-  return NativeInstruction::instruction_size;                                   // only one instruction
+  if (!NativeInstruction::is_compressed_instr(branch)) {
+    assert(is_imm_in_range(offset, 12, 1),
+           "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
+    Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
+    Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f);                       // offset[10:5]  ==> branch[30:25]
+    Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1);                         // offset[11]    ==> branch[7]
+    Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf);                         // offset[4:1]   ==> branch[11:8]
+    return NativeInstruction::instruction_size;                                   // only one instruction
+  } else {
+    assert(is_imm_in_range(offset, 8, 1),
+            "offset is too large to be patched in one c.beqz/c.bnez instruction: use beqz_nc()/bnez.nc() instead.\n");
+    Assembler::patch_c(branch, 2, 2, (offset & nth_bit(5)) >> 5);
+    Assembler::patch_c(branch, 4, 3, (offset & right_n_bits(3)) >> 1);
+    Assembler::patch_c(branch, 6, 5, (offset & right_n_bits(8)) >> 6);
+    Assembler::patch_c(branch, 11, 10, (offset & right_n_bits(5)) >> 3);
+    Assembler::patch_c(branch, 12, 12, (offset & nth_bit(8)) >> 8);
+    return NativeInstruction::compressed_instruction_size;                     // only one instruction
+  }
 }
 
 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
@@ -1211,18 +1259,24 @@ static int patch_offset_in_pc_relative(address branch, int64_t offset) {
 }
 
 static int patch_addr_in_movptr(address branch, address target) {
-  const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
+  // lui + addi + slli(C) + addi + slli(C) + addi/jalr/load
+  const int size = !UseRVC ?
+          6 * NativeInstruction::instruction_size :
+          4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size;
   int32_t lower = ((intptr_t)target << 36) >> 36;
   int64_t upper = ((intptr_t)target - lower) >> 28;
   Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
   Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
-  Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
-  Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
-  return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+  Assembler::patch(branch + (!UseRVC ? 12 : 10), 31, 20, (lower >> 5) & 0x7ff);  // Addi.    target[15: 5] ==> branch[31:20]
+  Assembler::patch(branch + (!UseRVC ? 20 : 16), 31, 20, lower & 0x1f);  // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
+  return size;
 }
 
 static int patch_imm_in_li64(address branch, address target) {
-  const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
+  // lui + addi + slli(C) + addi + slli(C) + addi + slli(C) + addi
+  const int size = !UseRVC ?
+          8 * NativeInstruction::instruction_size :
+          5 * NativeInstruction::instruction_size + 3 * NativeInstruction::compressed_instruction_size;
   int64_t lower = (intptr_t)target & 0xffffffff;
   lower = lower - ((lower << 44) >> 44);
   int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
@@ -1236,10 +1290,10 @@ static int patch_imm_in_li64(address branch, address target) {
   Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
   Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
   // Load the rest 32 bits.
-  Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
-  Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
-  Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
-  return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
+  Assembler::patch(branch + (!UseRVC ? 12 : 10), 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
+  Assembler::patch(branch + (!UseRVC ? 20 : 16), 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
+  Assembler::patch(branch + (!UseRVC ? 28 : 22), 31, 20, (intptr_t)target & 0xff);                   // Addi.
+  return size;
 }
 
 static int patch_imm_in_li32(address branch, int32_t target) {
@@ -1253,10 +1307,8 @@ static int patch_imm_in_li32(address branch, int32_t target) {
   return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
 }
 
-static long get_offset_of_jal(address insn_addr) {
-  assert_cond(insn_addr != NULL);
+static long get_offset_of_jal(unsigned insn) {
   long offset = 0;
-  unsigned insn = *(unsigned*)insn_addr;
   long val = (long)Assembler::sextract(insn, 31, 12);
   offset |= ((val >> 19) & 0x1) << 20;
   offset |= (val & 0xff) << 12;
@@ -1266,14 +1318,12 @@ static long get_offset_of_jal(address insn_addr) {
   return offset;
 }
 
-static long get_offset_of_conditional_branch(address insn_addr) {
+static long get_offset_of_conditional_branch(unsigned insn) {
   long offset = 0;
-  assert_cond(insn_addr != NULL);
-  unsigned insn = *(unsigned*)insn_addr;
   offset = (long)Assembler::sextract(insn, 31, 31);
-  offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
-  offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
-  offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
+  offset = (offset << 12) | ((Assembler::sextract(insn, 7, 7) & 0x1) << 11);
+  offset = offset | ((Assembler::sextract(insn, 30, 25) & 0x3f) << 5);
+  offset = offset | ((Assembler::sextract(insn, 11, 8) & 0xf) << 1);
   offset = (offset << 41) >> 41;
   return offset;
 }
@@ -1290,9 +1340,9 @@ static long get_offset_of_pc_relative(address insn_addr) {
 static address get_target_of_movptr(address insn_addr) {
   assert_cond(insn_addr != NULL);
   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                                     // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 2)))[3], 31, 20)) << 5;         // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 4)))[5], 31, 20));              // Addi/Jalr/Load.
   return (address) target_address;
 }
 
@@ -1300,9 +1350,9 @@ static address get_target_of_li64(address insn_addr) {
   assert_cond(insn_addr != NULL);
   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
-  target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 2)))[3], 31, 20)) << 20;        // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 4)))[5], 31, 20)) << 8;         // Addi.
+  target_address += ((int64_t)Assembler::sextract(((unsigned*)(insn_addr - (!UseRVC ? 0 : 6)))[7], 31, 20));              // Addi.
   return (address)target_address;
 }
 
@@ -1342,9 +1392,9 @@ address MacroAssembler::target_addr_for_insn(address insn_addr) {
   long offset = 0;
   assert_cond(insn_addr != NULL);
   if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
-    offset = get_offset_of_jal(insn_addr);
+    offset = get_offset_of_jal(*(unsigned*)insn_addr);
   } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
-    offset = get_offset_of_conditional_branch(insn_addr);
+    offset = get_offset_of_conditional_branch(*(unsigned*)insn_addr);
   } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
     offset = get_offset_of_pc_relative(insn_addr);
   } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
@@ -1381,8 +1431,7 @@ void MacroAssembler::reinit_heapbase() {
       mv(xheapbase, CompressedOops::ptrs_base());
     } else {
       int32_t offset = 0;
-      la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
-      ld(xheapbase, Address(xheapbase, offset));
+      ld_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), xheapbase);
     }
   }
 }
@@ -1402,7 +1451,7 @@ void MacroAssembler::mvw(Register Rd, int32_t imm32) {
 void MacroAssembler::mv(Register Rd, Address dest) {
   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
   code_section()->relocate(pc(), dest.rspec());
-  movptr(Rd, dest.target());
+  movptr(Rd, dest.target(), NOT_COMPRESSIBLE);
 }
 
 void MacroAssembler::mv(Register Rd, address addr) {
@@ -2483,10 +2532,10 @@ void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
     // the code cache cannot exceed 2Gb.
     la_patchable(tmp, entry, offset);
     if (cbuf != NULL) { cbuf->set_insts_mark(); }
-    jalr(x0, tmp, offset);
+    jalr_nc(x0, tmp, offset);
   } else {
     if (cbuf != NULL) { cbuf->set_insts_mark(); }
-    j(entry);
+    j_nc(entry);
   }
 }
 
@@ -2500,10 +2549,10 @@ void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
     // the code cache cannot exceed 2Gb.
     la_patchable(tmp, entry, offset);
     if (cbuf != NULL) { cbuf->set_insts_mark(); }
-    jalr(x1, tmp, offset); // link
+    jalr_nc(x1, tmp, offset); // link
   } else {
     if (cbuf != NULL) { cbuf->set_insts_mark(); }
-    jal(entry); // link
+    jal_nc(entry); // link
   }
 }
 
@@ -2762,10 +2811,31 @@ void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &o
     auipc(reg1, (int32_t)distance + 0x800);
     offset = ((int32_t)distance << 20) >> 20;
   } else {
-    movptr_with_offset(reg1, dest.target(), offset);
+    movptr_with_offset(reg1, dest.target(), offset, NOT_COMPRESSIBLE);
   }
 }
 
+void MacroAssembler::ld_patchable(Register Rd, const Address &dest, Register tmp) {
+  int offset = 0;
+  la_patchable(tmp, dest, offset);
+  // C-Ext: use uncompressed instructions to match pd_patch_instruction_size()
+  ld_nc(Rd, tmp, offset);
+}
+
+void MacroAssembler::addi_patchable(Register Rd, const Address &dest, Register tmp) {
+  int offset = 0;
+  la_patchable(tmp, dest, offset);
+  // C-Ext: use uncompressed instructions to match pd_patch_instruction_size()
+  addi_nc(Rd, tmp, offset);
+}
+
+void MacroAssembler::jalr_patchable(Register Rd, const Address &dest, Register tmp) {
+  int offset = 0;
+  la_patchable(tmp, dest, offset);
+  // C-Ext: use uncompressed instructions to match pd_patch_instruction_size()
+  jalr_nc(x1, tmp, offset);
+}
+
 void MacroAssembler::build_frame(int framesize) {
   assert(framesize >= 2, "framesize must include space for FP/RA");
   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
@@ -2784,7 +2854,7 @@ void MacroAssembler::remove_frame(int framesize) {
   add(sp, sp, framesize);
 }
 
-void MacroAssembler::reserved_stack_check() {
+void MacroAssembler::reserved_stack_check(bool compressible) {
     // testing if reserved zone needs to be enabled
     Label no_reserved_zone_enabling;
 
@@ -2793,18 +2863,14 @@ void MacroAssembler::reserved_stack_check() {
 
     enter();   // RA and FP are live.
     mv(c_rarg0, xthread);
-    int32_t offset = 0;
-    la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
-    jalr(x1, t0, offset);
+    jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), t0);
     leave();
 
     // We have already removed our own frame.
     // throw_delayed_StackOverflowError will think that it's been
     // called by our caller.
-    offset = 0;
-    la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
-    jalr(x0, t0, offset);
-    should_not_reach_here();
+    jalr_patchable(x0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), t0);
+    should_not_reach_here(compressible);
 
     bind(no_reserved_zone_enabling);
 }
@@ -2892,9 +2958,9 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
   if (cbuf != NULL) { cbuf->set_insts_mark(); }
   relocate(entry.rspec());
   if (!far_branches()) {
-    jal(entry.target());
+    jal_nc(entry.target());
   } else {
-    jal(pc());
+    jal_nc(pc());
   }
   // just need to return a non-null address
   postcond(pc() != badAddress);
@@ -2903,7 +2969,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
 
 address MacroAssembler::ic_call(address entry, jint method_index) {
   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
-  movptr(t1, (address)Universe::non_oop_word());
+  movptr(t1, (address)Universe::non_oop_word(), NOT_COMPRESSIBLE);
   assert_cond(entry != NULL);
   return trampoline_call(Address(entry, rh));
 }
@@ -2933,7 +2999,9 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
 
   // make sure 4 byte aligned here, so that the destination address would be
   // 8 byte aligned after 3 intructions
-  while (offset() % wordSize == 0) { nop(); }
+  // C-Ext: when we reach here we may get a 2-byte alignment and
+  //   nop() will be 2 bytes in length.
+  while (offset() % wordSize != 4) { nop(); }
 
   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
                                             insts_call_instruction_offset));
@@ -2943,11 +3011,12 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
   // - load the call
   // - call
   Label target;
-  ld(t0, target);  // auipc + ld
-  jr(t0);          // jalr
+  ld_nc(t0, target);  // auipc + ld
+  jr_nc(t0);          // jalr
   bind(target);
   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
          "should be");
+  assert(offset() % wordSize == 0, "address loaded by ld must be 8-byte aligned under riscv64");
   emit_int64((intptr_t)dest);
 
   const address stub_start_addr = addr_at(stub_start_offset);
@@ -2989,8 +3058,7 @@ void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
   assert_different_registers(src1, t0);
   int32_t offset;
-  la_patchable(t0, src2, offset);
-  ld(t0, Address(t0, offset));
+  ld_patchable(t0, src2, t0);
   beq(src1, t0, equal);
 }
 
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 1913638e8f6..4907445f595 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -136,7 +136,7 @@ class MacroAssembler: public Assembler {
   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 
   // last Java Frame (fills frame anchor)
-  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp);
+  void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp, bool compressible = true);
   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register temp);
   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc,Register temp);
 
@@ -357,13 +357,13 @@ class MacroAssembler: public Assembler {
   }
 
   // prints msg, dumps registers and stops execution
-  void stop(const char* msg);
+  void stop(const char* msg, bool compressible = true);
 
   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 
   void unimplemented(const char* what = "");
 
-  void should_not_reach_here() { stop("should not reach here"); }
+  void should_not_reach_here(bool compressible = true) { stop("should not reach here", compressible); }
 
   static address target_addr_for_insn(address insn_addr);
 
@@ -398,6 +398,7 @@ class MacroAssembler: public Assembler {
  public:
   // Standard pseudoinstruction
   void nop();
+  void nop_nc();
   void mv(Register Rd, Register Rs) ;
   void notr(Register Rd, Register Rs);
   void neg(Register Rd, Register Rs);
@@ -444,13 +445,13 @@ class MacroAssembler: public Assembler {
   void fsflagsi(unsigned imm);
 
   void beqz(Register Rs, const address &dest);
+  void bnez(Register Rs, const address &dest);
   void blez(Register Rs, const address &dest);
   void bgez(Register Rs, const address &dest);
   void bltz(Register Rs, const address &dest);
   void bgtz(Register Rs, const address &dest);
-  void bnez(Register Rs, const address &dest);
   void la(Register Rd, Label &label);
-  void la(Register Rd, const address &dest);
+  void la(Register Rd, const address &dest, bool compressible = true);
   void la(Register Rd, const Address &adr);
   //label
   void beqz(Register Rs, Label &l, bool is_far = false);
@@ -472,6 +473,12 @@ class MacroAssembler: public Assembler {
   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 
+  // C-Ext: incompressible version
+  void beqz_nc(Register Rs, const address &dest);
+  void bnez_nc(Register Rs, const address &dest);
+  void beqz_nc(Register Rs, Label &l, bool is_far = false);
+  void bnez_nc(Register Rs, Label &l, bool is_far = false);
+
   void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
   void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
   void push_reg(Register Rs);
@@ -612,6 +619,12 @@ class MacroAssembler: public Assembler {
 
   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
 
+  // Note: programmers should use these functions
+  //       if wanting to write the same logic - to prevent from misuse.
+  void ld_patchable(Register Rd, const Address &dest, Register tmp);
+  void addi_patchable(Register Rd, const Address &dest, Register tmp);
+  void jalr_patchable(Register Rd, const Address &dest, Register tmp);
+
   virtual void _call_Unimplemented(address call_site) {
     mv(t1, call_site);
   }
@@ -633,7 +646,7 @@ class MacroAssembler: public Assembler {
   void build_frame(int framesize);
   void remove_frame(int framesize);
 
-  void reserved_stack_check();
+  void reserved_stack_check(bool compressible = true);
 
   void get_polling_page(Register dest, relocInfo::relocType rtype);
   address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
@@ -788,11 +801,9 @@ class MacroAssembler: public Assembler {
 
   void ld_constant(Register dest, const Address &const_addr) {
     if (NearCpool) {
-      ld(dest, const_addr);
+      ld_nc(dest, const_addr);
     } else {
-      int32_t offset = 0;
-      la_patchable(dest, InternalAddress(const_addr.target()), offset);
-      ld(dest, Address(dest, offset));
+      ld_patchable(dest, InternalAddress(const_addr.target()), dest);
     }
   }
 
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
index 977e0c74445..ebf31b1aeb2 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
@@ -39,64 +39,145 @@
 #include "c1/c1_Runtime1.hpp"
 #endif
 
+uint32_t NativeInstruction::extract_rs1(address instr, int &size) {
+  assert_cond(instr != NULL);
+  if (is_compressed_instr(instr)) {
+    size = compressed_instruction_size;
+    uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0);
+    switch (op) {
+      case 0b00: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7);
+      }
+      case 0b01: {
+        if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) {
+          return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7);
+        } else {
+          return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7);
+        }
+      }
+      case 0b10: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7);
+      }
+      default:
+        ShouldNotReachHere();
+    }
+    return 0;
+  } else {
+    size = instruction_size;
+    return Assembler::extract(((unsigned*)instr)[0], 19, 15);
+  }
+}
+
+uint32_t NativeInstruction::extract_rs2(address instr, int &size) {
+  assert_cond(instr != NULL);
+  if (is_compressed_instr(instr)) {
+    size = compressed_instruction_size;
+    uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0);
+    switch (op) {
+      case 0b00: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2);
+      }
+      case 0b01: {
+        if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) {
+          ShouldNotReachHere();
+          return 0;
+        } else {
+          return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2);
+        }
+      }
+      case 0b10: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 6, 2);
+      }
+      default:
+        ShouldNotReachHere();
+    }
+    return 0;
+  } else {
+    size = instruction_size;
+    return Assembler::extract(((unsigned*)instr)[0], 24, 20);
+  }
+}
+
+uint32_t NativeInstruction::extract_rd(address instr, int &size) {
+  assert_cond(instr != NULL);
+  if (is_compressed_instr(instr)) {
+    size = compressed_instruction_size;
+    uint16_t op = Assembler::extract_c(((uint16_t*)instr)[0], 1, 0);
+    switch (op) {
+      case 0b00: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 4, 2);
+      }
+      case 0b01: {
+        if (!is_set_nth_bit(((uint16_t*)instr)[0], 15)) {
+          return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7);
+        } else {
+          return Assembler::extract_c(((uint16_t*)instr)[0], 9, 7);
+        }
+      }
+      case 0b10: {
+        return Assembler::extract_c(((uint16_t*)instr)[0], 11, 7);
+      }
+      default:
+        ShouldNotReachHere();
+    }
+    return 0;
+  } else {
+    size = instruction_size;
+    return Assembler::extract(((unsigned*)instr)[0], 11, 7);
+  }
+}
+
 bool NativeInstruction::is_pc_relative_at(address instr) {
   // auipc + jalr
   // auipc + addi
   // auipc + load
   // auipc + fload_load
-  if ((is_auipc_at(instr)) &&
-      (is_addi_at(instr + 4) || is_jalr_at(instr + 4) || is_load_at(instr + 4) || is_float_load_at(instr + 4)) &&
-      check_pc_relative_data_dependency(instr)) {
-    return true;
-  }
-  return false;
+  return (is_auipc_at(instr)) &&
+         (is_addi_at(instr + instruction_size) ||
+          is_jalr_at(instr + instruction_size) ||
+          is_load_at(instr + instruction_size) ||
+          is_float_load_at(instr + instruction_size)) &&
+         check_pc_relative_data_dependency(instr);
 }
 
 // ie:ld(Rd, Label)
 bool NativeInstruction::is_load_pc_relative_at(address instr) {
-  if (is_auipc_at(instr) && // auipc
-      is_ld_at(instr + 4) && // ld
-      check_load_pc_relative_data_dependency(instr)) {
-      return true;
-  }
-  return false;
+  return is_auipc_at(instr) && // auipc
+         is_ld_at(instr + instruction_size) && // ld
+         check_load_pc_relative_data_dependency(instr);
 }
 
 bool NativeInstruction::is_movptr_at(address instr) {
-  if (is_lui_at(instr) && // Lui
-      is_addi_at(instr + 4) && // Addi
-      is_slli_shift_at(instr + 8, 11) && // Slli Rd, Rs, 11
-      is_addi_at(instr + 12) && // Addi
-      is_slli_shift_at(instr + 16, 5) && // Slli Rd, Rs, 5
-      (is_addi_at(instr + 20) || is_jalr_at(instr + 20) || is_load_at(instr + 20)) && // Addi/Jalr/Load
-      check_movptr_data_dependency(instr)) {
-    return true;
-  }
-  return false;
+  address pos = instr;
+  int size = 0;
+  return is_lui_at(pos) && // Lui
+         is_addi_at(pos += instruction_size) && // Addi
+         is_slli_shift_at(pos += instruction_size, 11, size) && // Slli Rd, Rs, 11
+         is_addi_at(pos += size) && // Addi
+         is_slli_shift_at(pos += instruction_size, 5, size) && // Slli Rd, Rs, 5
+         (is_addi_at(pos += size) || is_jalr_at(pos) || is_load_at(pos)) && // Addi/Jalr/Load
+         check_movptr_data_dependency(instr);
 }
 
 bool NativeInstruction::is_li32_at(address instr) {
-  if (is_lui_at(instr) && // lui
-      is_addiw_at(instr + 4) && // addiw
-      check_li32_data_dependency(instr)) {
-    return true;
-  }
-  return false;
+  address pos = instr;
+  return is_lui_at(pos) && // lui
+         is_addiw_at(pos += instruction_size) && // addiw
+         check_li32_data_dependency(instr);
 }
 
 bool NativeInstruction::is_li64_at(address instr) {
-  if (is_lui_at(instr) && // lui
-      is_addi_at(instr + 4) && // addi
-      is_slli_shift_at(instr + 8, 12)&&  // Slli Rd, Rs, 12
-      is_addi_at(instr + 12) && // addi
-      is_slli_shift_at(instr + 16, 12) && // Slli Rd, Rs, 12
-      is_addi_at(instr + 20) && // addi
-      is_slli_shift_at(instr + 24, 8) && // Slli Rd, Rs, 8
-      is_addi_at(instr + 28) && // addi
-      check_li64_data_dependency(instr)) {
-    return true;
-  }
-  return false;
+  address pos = instr;
+  int size = 0;
+  return is_lui_at(pos) && // lui
+         is_addi_at(pos += instruction_size) && // addi
+         is_slli_shift_at(pos += instruction_size, 12, size) &&  // Slli Rd, Rs, 12
+         is_addi_at(pos += size) && // addi
+         is_slli_shift_at(pos += instruction_size, 12, size) &&  // Slli Rd, Rs, 12
+         is_addi_at(pos += size) && // addi
+         is_slli_shift_at(pos += instruction_size, 8, size) &&   // Slli Rd, Rs, 8
+         is_addi_at(pos += size) && // addi
+         check_li64_data_dependency(instr);
 }
 
 void NativeCall::verify() {
@@ -203,7 +284,7 @@ void NativeMovConstReg::set_data(intptr_t x) {
   } else {
     // Store x into the instruction stream.
     MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x);
-    ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+    ICache::invalidate_range(instruction_address(), get_movptr_instruction_size());
   }
 
   // Find and replace the oop/metadata corresponding to this
@@ -341,7 +422,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
 
   assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
 
-  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() ||
+  assert(nativeInstruction_at(verified_entry)->is_jump_or_nop_nc() ||
          nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(),
          "riscv64 cannot replace non-jump with jump");
 
@@ -371,14 +452,14 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
 void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
   NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos;
 
-  CodeBuffer cb(code_pos, instruction_size);
+  CodeBuffer cb(code_pos, get_instruction_size());
   MacroAssembler a(&cb);
 
   int32_t offset = 0;
-  a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli
-  a.jalr(x0, t0, offset); // jalr
+  a.movptr_with_offset(t0, entry, offset, NOT_COMPRESSIBLE); // lui, addi, slli, addi, slli
+  a.jalr_nc(x0, t0, offset); // jalr
 
-  ICache::invalidate_range(code_pos, instruction_size);
+  ICache::invalidate_range(code_pos, get_instruction_size());
 }
 
 // MT-safe patching of a long jump instruction.
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
index 117d58e8e28..dd06afc813e 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -53,7 +53,8 @@ class NativeInstruction {
   friend bool is_NativeCallTrampolineStub_at(address);
  public:
   enum {
-    instruction_size = 4
+    instruction_size = 4,
+    compressed_instruction_size = 2,
   };
 
   juint encoding() const {
@@ -65,26 +66,49 @@ class NativeInstruction {
   bool is_call()                            const { return is_call_at(addr_at(0));        }
   bool is_jump()                            const { return is_jump_at(addr_at(0));        }
 
+  static bool is_compressed_instr(address instr) {
+    if ((((unsigned*)instr)[0] & 0b11) == 0b11) {
+      return false;
+    }
+    assert((((uint16_t *)instr)[0] & 0b11) != 0b11, "seems instr is not an illegal instruction beginning: 0x%x", ((unsigned*)instr)[0]);
+    return true;
+  }
+  static int instr_size(address instr) {
+    return is_compressed_instr(instr) ? compressed_instruction_size : instruction_size;
+  }
   static bool is_jal_at(address instr)        { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1101111; }
   static bool is_jalr_at(address instr)       { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100111 &&
-                                                Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
+                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
   static bool is_branch_at(address instr)     { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b1100011; }
   static bool is_ld_at(address instr)         { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011 &&
-                                                Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); }
+                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b011); }
   static bool is_load_at(address instr)       { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000011; }
   static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0000111; }
   static bool is_auipc_at(address instr)      { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010111; }
   static bool is_jump_at(address instr)       { assert_cond(instr != NULL); return (is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr)); }
   static bool is_addi_at(address instr)       { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 &&
-                                                Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
+                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
   static bool is_addiw_at(address instr)      { assert_cond(instr != NULL); return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0011011 &&
-                                                Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
+                                                                                    Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b000); }
   static bool is_lui_at(address instr)        { assert_cond(instr != NULL); return Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0110111; }
-  static bool is_slli_shift_at(address instr, uint32_t shift) {
+  static bool is_slli_shift_at(address instr, uint32_t shift) { int size = 0; return is_slli_shift_at(instr, shift, size); }
+  static uint16_t extract_slli_c(address instr) {
+    uint16_t low5 = Assembler::extract_c(((uint16_t*)instr)[0], 6, 2);
+    uint16_t high1 = Assembler::extract_c(((uint16_t*)instr)[0], 12, 12);
+    return (high1 << 5 | low5);
+  }
+  static bool is_slli_shift_at(address instr, uint32_t shift, int &size) {
     assert_cond(instr != NULL);
+    if (is_compressed_instr(instr)) {
+      return Assembler::extract_c(((uint16_t*)instr)[0], 15, 13) == 0b000 &&
+             Assembler::extract_c(((uint16_t*)instr)[0], 1, 0) == 0b10 &&
+             extract_slli_c(instr) == shift &&
+             (size = compressed_instruction_size);
+    }
     return (Assembler::extract(((unsigned*)instr)[0], 6, 0) == 0b0010011 && // opcode field
             Assembler::extract(((unsigned*)instr)[0], 14, 12) == 0b001 &&   // funct3 field, select the type of operation
-            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
+            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift) &&  // shamt field
+            (size = instruction_size);
   }
 
   // return true if the (index1~index2) field of instr1 is equal to (index3~index4) field of instr2, otherwise false
@@ -93,6 +117,13 @@ class NativeInstruction {
     return Assembler::extract(((unsigned*)instr1)[0], index1, index2) == Assembler::extract(((unsigned*)instr2)[0], index3, index4);
   }
 
+  static uint32_t extract_rs1(address instr) { int size = 0; return extract_rs1(instr, size); }
+  static uint32_t extract_rs2(address instr) { int size = 0; return extract_rs2(instr, size); }
+  static uint32_t extract_rd(address instr) { int size = 0; return extract_rd(instr, size); }
+  static uint32_t extract_rs1(address instr, int &size);
+  static uint32_t extract_rs2(address instr, int &size);
+  static uint32_t extract_rd(address instr, int &size);
+
   // the instruction sequence of movptr is as below:
   //     lui
   //     addi
@@ -101,15 +132,21 @@ class NativeInstruction {
   //     slli
   //     addi/jalr/load
   static bool check_movptr_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7)       &&     // check the rs1 field of addi and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7)   &&     // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7)   &&     // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7)   &&     // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7)  &&     // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) &&     // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) &&     // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) &&     // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7);       // check the rs1 field of addi/jalr/load and the rd field of slli
+    address lui = instr;
+    address addi1 = lui + instruction_size;
+    address slli1 = addi1 + instruction_size;
+    address addi2 = slli1 + instr_size(slli1);
+    address slli2 = addi2 + instruction_size;
+    address final = slli2 + instr_size(slli2);
+    return extract_rs1(addi1) == extract_rd(lui) &&
+           extract_rs1(addi1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(slli2) &&
+           extract_rs1(final) == extract_rd(slli2);
   }
 
   // the instruction sequence of li64 is as below:
@@ -121,44 +158,61 @@ class NativeInstruction {
   //     addi
   //     slli
   //     addi
-  static bool check_li64_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7)       &&  // check the rs1 field of addi and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7)   &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 4, 11, 7)   &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 8, 19, 15, instr + 8, 11, 7)   &&  // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 8, 11, 7)  &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 12, 19, 15, instr + 12, 11, 7) &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 12, 11, 7) &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 16, 19, 15, instr + 16, 11, 7) &&  // check the rs1 field and the rd field fof slli
-           compare_instr_field(instr + 20, 19, 15, instr + 16, 11, 7) &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 20, 19, 15, instr + 20, 11, 7) &&  // check the rs1 field and the rd field of addi
-           compare_instr_field(instr + 24, 19, 15, instr + 20, 11, 7) &&  // check the rs1 field of slli and the rd field of addi
-           compare_instr_field(instr + 24, 19, 15, instr + 24, 11, 7) &&  // check the rs1 field and the rd field of slli
-           compare_instr_field(instr + 28, 19, 15, instr + 24, 11, 7) &&  // check the rs1 field of addi and the rd field of slli
-           compare_instr_field(instr + 28, 19, 15, instr + 28, 11, 7);    // check the rs1 field and the rd field of addi
+  static bool check_li64_data_dependency(address instr) {  // FIXME: maybe retrive back origin code because we can only optimize 'slli' here.
+    address lui = instr;
+    address addi1 = lui + instruction_size;
+    address slli1 = addi1 + instruction_size;
+    address addi2 = slli1 + instr_size(slli1);
+    address slli2 = addi2 + instruction_size;
+    address addi3 = slli2 + instr_size(slli2);
+    address slli3 = addi3 + instruction_size;
+    address addi4 = slli3 + instr_size(slli3);
+    return extract_rs1(addi1) == extract_rd(lui) &&
+           extract_rs1(addi1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(addi1) &&
+           extract_rs1(slli1) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(slli1) &&
+           extract_rs1(addi2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(addi2) &&
+           extract_rs1(slli2) == extract_rd(slli2) &&
+           extract_rs1(addi3) == extract_rd(slli2) &&
+           extract_rs1(addi3) == extract_rd(addi3) &&
+           extract_rs1(slli3) == extract_rd(addi3) &&
+           extract_rs1(slli3) == extract_rd(slli3) &&
+           extract_rs1(addi4) == extract_rd(slli3) &&
+           extract_rs1(addi4) == extract_rd(addi4);
   }
 
   // the instruction sequence of li32 is as below:
   //     lui
   //     addiw
   static bool check_li32_data_dependency(address instr) {
-    return compare_instr_field(instr + 4, 19, 15, instr, 11, 7) &&     // check the rs1 field of addiw and the rd field of lui
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7);   // check the rs1 field and the rd field of addiw
+    address lui = instr;
+    address addiw = lui + instruction_size;
+
+    return extract_rs1(addiw) == extract_rd(lui) &&
+           extract_rs1(addiw) == extract_rd(addiw);
   }
 
   // the instruction sequence of pc-relative is as below:
   //     auipc
   //     jalr/addi/load/float_load
   static bool check_pc_relative_data_dependency(address instr) {
-    return compare_instr_field(instr, 11, 7, instr + 4, 19, 15);          // check the rd field of auipc and the rs1 field of jalr/addi/load/float_load
+    address auipc = instr;
+    address final = auipc + instruction_size;
+
+    return extract_rs1(final) == extract_rd(auipc);
   }
 
   // the instruction sequence of load_label is as below:
   //     auipc
   //     load
   static bool check_load_pc_relative_data_dependency(address instr) {
-    return compare_instr_field(instr, 11, 7, instr + 4, 11, 7) &&      // check the rd field of auipc and the rd field of load
-           compare_instr_field(instr + 4, 19, 15, instr + 4, 11, 7);   // check the rs1 field of load and the rd field of load
+    address auipc = instr;
+    address load = auipc + instruction_size;
+
+    return extract_rd(load) == extract_rd(auipc) &&
+           extract_rs1(load) == extract_rd(load);
   }
 
   static bool is_movptr_at(address instr);
@@ -168,6 +222,7 @@ class NativeInstruction {
   static bool is_load_pc_relative_at(address branch);
 
   static bool is_call_at(address instr) {
+    assert(!is_compressed_instr(instr), "we need to reserve the 4-byte instruction to handle all cases");
     if (is_jal_at(instr) || is_jalr_at(instr)) {
       return true;
     }
@@ -176,9 +231,11 @@ class NativeInstruction {
   static bool is_lwu_to_zr(address instr);
 
   inline bool is_nop();
+  inline bool is_compressed_nop();
+  inline bool is_uncompressed_nop();
   inline bool is_illegal();
   inline bool is_return();
-  inline bool is_jump_or_nop();
+  inline bool is_jump_or_nop_nc();
   inline bool is_cond_jump();
   bool is_safepoint_poll();
   bool is_sigill_zombie_not_entrant();
@@ -189,6 +246,7 @@ class NativeInstruction {
 
   jint int_at(int offset) const        { return *(jint*) addr_at(offset); }
   juint uint_at(int offset) const      { return *(juint*) addr_at(offset); }
+  jushort uint16_at(int offset) const { return *(jushort *) addr_at(offset); }
 
   address ptr_at(int offset) const     { return *(address*) addr_at(offset); }
 
@@ -318,13 +376,23 @@ inline NativeCall* nativeCall_before(address return_address) {
 class NativeMovConstReg: public NativeInstruction {
  public:
   enum RISCV64_specific_constants {
-    movptr_instruction_size             =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
+    movptr_instruction_size      =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr().
+    compressed_movptr_instruction_size  =    4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C), addi.  See movptr().
     movptr_with_offset_instruction_size =    5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset().
+    compressed_movptr_with_offset_instruction_size = 3 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C). See movptr_with_offset().
     load_pc_relative_instruction_size   =    2 * NativeInstruction::instruction_size, // auipc, ld
     instruction_offset                  =    0,
     displacement_offset                 =    0
   };
 
+  static const int get_movptr_with_offset_instruction_size() {
+    return !UseRVC ? movptr_with_offset_instruction_size : compressed_movptr_with_offset_instruction_size;
+  }
+
+  static const int get_movptr_instruction_size() {
+    return !UseRVC ? movptr_instruction_size : compressed_movptr_instruction_size;
+  }
+
   address instruction_address() const       { return addr_at(instruction_offset); }
   address next_instruction_address() const  {
     // if the instruction at 5 * instruction_size is addi,
@@ -333,12 +401,12 @@ class NativeMovConstReg: public NativeInstruction {
     // However, when the instruction at 5 * instruction_size isn't addi,
     // the next instruction address should be addr_at(5 * instruction_size)
     if (nativeInstruction_at(instruction_address())->is_movptr()) {
-      if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) {
+      if (is_addi_at(addr_at(get_movptr_with_offset_instruction_size()))) {
         // Assume: lui, addi, slli, addi, slli, addi
-        return addr_at(movptr_instruction_size);
+        return addr_at(get_movptr_instruction_size());
       } else {
         // Assume: lui, addi, slli, addi, slli
-        return addr_at(movptr_with_offset_instruction_size);
+        return addr_at(get_movptr_with_offset_instruction_size());
       }
     } else if (is_load_pc_relative_at(instruction_address())) {
       // Assume: auipc, ld
@@ -353,7 +421,7 @@ class NativeMovConstReg: public NativeInstruction {
 
   void flush() {
     if (!maybe_cpool_ref(instruction_address())) {
-      ICache::invalidate_range(instruction_address(), movptr_instruction_size);
+      ICache::invalidate_range(instruction_address(), get_movptr_instruction_size());
     }
   }
 
@@ -422,10 +490,10 @@ inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
 class NativeJump: public NativeInstruction {
  public:
   enum RISCV64_specific_constants {
-    instruction_size            =    4,
+    instruction_size            =    NativeInstruction::instruction_size,
     instruction_offset          =    0,
     data_offset                 =    0,
-    next_instruction_offset     =    4
+    next_instruction_offset     =    NativeInstruction::instruction_size
   };
 
   address instruction_address() const       { return addr_at(instruction_offset); }
@@ -456,12 +524,18 @@ inline NativeJump* nativeJump_at(address addr) {
 class NativeGeneralJump: public NativeJump {
 public:
   enum RISCV64_specific_constants {
-    instruction_size            =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
+    instruction_size     =    6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr
+    compressed_instruction_size =    4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size, // lui, addi, slli(C), addi, slli(C), jalr
     instruction_offset          =    0,
     data_offset                 =    0,
-    next_instruction_offset     =    6 * NativeInstruction::instruction_size  // lui, addi, slli, addi, slli, jalr
+    normal_next_instruction_offset     =    6 * NativeInstruction::instruction_size,  // lui, addi, slli, addi, slli, jalr
+    compressed_next_instruction_offset =    4 * NativeInstruction::instruction_size + 2 * NativeInstruction::compressed_instruction_size  // lui, addi, slli(C), addi, slli(C), jalr
   };
 
+  static const int get_instruction_size() {
+    return !UseRVC ? instruction_size : compressed_instruction_size;
+  }
+
   address jump_destination() const;
 
   static void insert_unconditional(address code_pos, address entry);
@@ -481,13 +555,27 @@ class NativeIllegalInstruction: public NativeInstruction {
   static void insert(address code_pos);
 };
 
-inline bool NativeInstruction::is_nop()         {
-  uint32_t insn = *(uint32_t*)addr_at(0);
+inline bool NativeInstruction::is_nop() {
+  return is_compressed_nop() || is_uncompressed_nop();
+}
+
+inline bool NativeInstruction::is_compressed_nop() {
+  address instr_addr = addr_at(0);
+  if (is_compressed_instr(instr_addr)) {
+    uint16_t insn = *(uint16_t*)instr_addr;
+    return insn == 0x1;
+  }
+  return false;
+}
+
+inline bool NativeInstruction::is_uncompressed_nop() {
+  address instr_addr = addr_at(0);
+  uint32_t insn = *(uint32_t*)instr_addr;
   return insn == 0x13;
 }
 
-inline bool NativeInstruction::is_jump_or_nop() {
-  return is_nop() || is_jump();
+inline bool NativeInstruction::is_jump_or_nop_nc() {
+  return is_uncompressed_nop() || is_jump();
 }
 
 // Call trampoline stubs.
diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
index e3203a5f032..99c65b13b8a 100644
--- a/src/hotspot/cpu/riscv/register_riscv.hpp
+++ b/src/hotspot/cpu/riscv/register_riscv.hpp
@@ -58,7 +58,11 @@ class RegisterImpl: public AbstractRegisterImpl {
   enum {
     number_of_registers      = 32,
     number_of_byte_registers = 32,
-    max_slots_per_register   = 2
+    max_slots_per_register   = 2,
+
+    // C-Ext: integer registers in the range of [x8~x15] are correspond for RVC. Please see Table 16.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
   };
 
   // derived registers, offsets, and addresses
@@ -71,10 +75,13 @@ class RegisterImpl: public AbstractRegisterImpl {
 
   // accessors
   int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   compressed_encoding() const              { assert(is_compressed_valid(), "invalid compressed register"); return ((intptr_t)this - compressed_register_base); }
   bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  bool  is_compressed_valid() const              { return compressed_register_base <= (intptr_t)this && (intptr_t)this <= compressed_register_top; }
   bool  has_byte_register() const                { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
   const char* name() const;
   int   encoding_nocheck() const                 { return (intptr_t)this; }
+  int   compressed_encoding_nocheck() const      { return ((intptr_t)this - compressed_register_base); }
 
   // Return the bit which represents this register.  This is intended
   // to be ORed into a bitmask: for usage see class RegSet below.
@@ -131,7 +138,11 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
  public:
   enum {
     number_of_registers     = 32,
-    max_slots_per_register  = 2
+    max_slots_per_register  = 2,
+
+    // C-Ext: float registers in the range of [f8~f15] are correspond for RVC. Please see Table 16.2 in spec.
+    compressed_register_base = 8,
+    compressed_register_top  = 15,
   };
 
   // construction
@@ -144,8 +155,11 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
 
   // accessors
   int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   compressed_encoding() const               { assert(is_compressed_valid(), "invalid compressed register"); return ((intptr_t)this - compressed_register_base); }
   int   encoding_nocheck() const                         { return (intptr_t)this; }
+  int   compressed_encoding_nocheck() const       { return ((intptr_t)this - compressed_register_base); }
   bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  bool  is_compressed_valid() const               { return compressed_register_base <= (intptr_t)this && (intptr_t)this <= compressed_register_top; }
   const char* name() const;
 
 };
diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
index 79cba765ea1..c2f8a955972 100644
--- a/src/hotspot/cpu/riscv/riscv.ad
+++ b/src/hotspot/cpu/riscv/riscv.ad
@@ -1156,14 +1156,15 @@ bool needs_acquiring_load_reserved(const Node *n)
 
 int MachCallStaticJavaNode::ret_addr_offset()
 {
-  // call should be a simple jal
-  int off = 4;
-  return off;
+  // jal
+  return 1 * NativeInstruction::instruction_size;
 }
 
 int MachCallDynamicJavaNode::ret_addr_offset()
 {
-  return 28; // movptr, jal
+  return 4 * NativeInstruction::instruction_size +
+         2 * (!UseRVC ? NativeInstruction::instruction_size : NativeInstruction::compressed_instruction_size) +
+         1 * NativeInstruction::instruction_size; // movptr, jal
 }
 
 int MachCallRuntimeNode::ret_addr_offset() {
@@ -1171,19 +1172,26 @@ int MachCallRuntimeNode::ret_addr_offset() {
   //   jal(addr)
   // or with far branches
   //   jal(trampoline_stub)
-  // for real runtime callouts it will be six instructions
+  // for real runtime callouts it will be 12 instructions
   // see riscv64_enc_java_to_runtime
-  //   la(t1, retaddr)
-  //   la(t0, RuntimeAddress(addr))
-  //   addi(sp, sp, -2 * wordSize)
-  //   sd(zr, Address(sp))
-  //   sd(t1, Address(sp, wordSize))
-  //   jalr(t0)
+  //   la(t1, retaddr)                ->  auipc + addi
+  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli(C) + addi + slli(C) + addi
+  //   addi(sp, sp, -2 * wordSize)    ->  addi(C)
+  //   sd(zr, Address(sp))            ->  sd(C)
+  //   sd(t1, Address(sp, wordSize))  ->  sd(C)
+  //   jalr(t0)                       ->  jalr(C)
   CodeBlob *cb = CodeCache::find_blob(_entry_point);
   if (cb != NULL) {
     return 1 * NativeInstruction::instruction_size;
   } else {
-    return 12 * NativeInstruction::instruction_size;
+    const int instruction_size = NativeInstruction::instruction_size;
+    const int compressed_instruction_size = (!UseRVC ? instruction_size : NativeInstruction::compressed_instruction_size);
+    return 2 * instruction_size +
+           4 * instruction_size + 2 * compressed_instruction_size +
+           1 * compressed_instruction_size +
+           1 * compressed_instruction_size +
+           1 * compressed_instruction_size +
+           1 * compressed_instruction_size;
   }
 }
 
@@ -1192,6 +1200,41 @@ int MachCallNativeNode::ret_addr_offset() {
   return -1;
 }
 
+// C-Ext: With C-Ext a call may get 2-byte aligned.
+//   The offset encoding in jal ranges bits [12, 31], which could span the cache line.
+//   Patching this unaligned address will make the write operation not atomic.
+//   Other threads may be running the same piece of code at full speed, causing concurrency issues.
+//   So we must ensure that it does not span a cache line so that it can be patched.
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const
+{
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
+// C-Ext: With C-Ext a call may get 2-byte aligned.
+//   The offset encoding in jal ranges bits [12, 31], which could span the cache line.
+//   Patching this unaligned address will make the write operation not atomic.
+//   Other threads may be running the same piece of code at full speed, causing concurrency issues.
+//   So we must ensure that it does not span a cache line so that it can be patched.
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+{
+  // skip the movptr in MacroAssembler::ic_call():
+  // lui + addi + slli(C) + addi + slli(C) + addi
+  // Though movptr() has already 4-byte aligned with or without C-Ext,
+  // We need to prevent from further changes by explicitly calculating the size.
+  const int instruction_size = NativeInstruction::instruction_size;
+  const int compressed_instruction_size = (!UseRVC ? instruction_size : NativeInstruction::compressed_instruction_size);
+  const int movptr_size =
+         2 * instruction_size +
+         1 * compressed_instruction_size +
+         1 * instruction_size +
+         1 * compressed_instruction_size +
+         1 * instruction_size;
+  current_offset += movptr_size;
+  // to make sure the address of jal 4-byte aligned.
+  return align_up(current_offset, alignment_required()) - current_offset;
+}
+
 //=============================================================================
 
 #ifndef PRODUCT
@@ -1226,7 +1269,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
   }
 
   uint MachNopNode::size(PhaseRegAlloc*) const {
-    return _count * NativeInstruction::instruction_size;
+    return _count * (!UseRVC ? NativeInstruction::instruction_size : NativeInstruction::compressed_instruction_size);
   }
 
 //=============================================================================
@@ -1295,7 +1338,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 
   // insert a nop at the start of the prolog so we can patch in a
   // branch if we need to invalidate the method later
-  __ nop();
+  __ nop_nc();  // 4 bytes
 
   assert_cond(C != NULL);
 
@@ -1387,7 +1430,14 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   __ remove_frame(framesize);
 
   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
-    __ reserved_stack_check();
+    // C-Ext: we need to emit instructions of the same constant size here.
+    //   This Node will emit should_not_reach_here(), further emitting a movptr of pc() address.
+    //   However, C2 will do PhaseOutput::scratch_emit_size() to simulate the size of Node -
+    //   this time, the pc() is a different value from the final emission and it may get compressed.
+    //   We may get a case that Node size is different between scratch_emit and real emission phase,
+    //   which are not allowed. So we need to emit the same constant size by disabling compression
+    //   of the movptr of pc() to align with the logic.
+    __ reserved_stack_check(NOT_COMPRESSIBLE);
   }
 
   if (do_polling() && C->is_method_compilation()) {
@@ -1644,7 +1694,8 @@ void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   int reg    = ra_->get_encode(this);
 
   if (is_imm_in_range(offset, 12, 0)) {
-    __ addi(as_Register(reg), sp, offset);
+    // C-Ext: See BoxLockNode::size(). We need to manually calculate this node's size.
+    __ addi_nc(as_Register(reg), sp, offset);
   } else if (is_imm_in_range(offset, 32, 0)) {
     __ li32(t0, offset);
     __ add(as_Register(reg), sp, t0);
@@ -9792,6 +9843,7 @@ instruct CallStaticJavaDirect(method meth)
               riscv64_enc_call_epilog );
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // TO HERE
@@ -9811,6 +9863,7 @@ instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
                riscv64_enc_call_epilog );
 
   ins_pipe(pipe_class_call);
+  ins_alignment(4);
 %}
 
 // Call Runtime Instruction
diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
index 506ff104603..dc9220652ad 100644
--- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
+++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
@@ -348,9 +348,7 @@ static void patch_callers_callsite(MacroAssembler *masm) {
 
   __ mv(c_rarg0, xmethod);
   __ mv(c_rarg1, ra);
-  int32_t offset = 0;
-  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), t0);
 
   // Explicit fence.i required because fixup_callers_callsite may change the code
   // stream.
@@ -1020,9 +1018,7 @@ static void rt_call(MacroAssembler* masm, address dest) {
   if (cb) {
     __ far_call(RuntimeAddress(dest));
   } else {
-    int32_t offset = 0;
-    __ la_patchable(t0, RuntimeAddress(dest), offset);
-    __ jalr(x1, t0, offset);
+    __ jalr_patchable(x1, RuntimeAddress(dest), t0);
   }
 }
 
@@ -1147,7 +1143,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     int vep_offset = ((intptr_t)__ pc()) - start;
 
     // First instruction must be a nop as it may need to be patched on deoptimisation
-    __ nop();
+    __ nop_nc();
     gen_special_dispatch(masm,
                          method,
                          in_sig_bt,
@@ -1298,7 +1294,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 
   // If we have to make this method not-entrant we'll overwrite its
   // first instruction with a jump.
-  __ nop();
+  __ nop_nc();
 
   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
     Label L_skip_barrier;
@@ -1799,9 +1795,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 #ifndef PRODUCT
     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 #endif
-    int32_t offset = 0;
-    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
-    __ jalr(x1, t0, offset);
+    __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), t0);
 
     // Restore any method result value
     restore_native_result(masm, ret_type, stack_slots);
@@ -2018,9 +2012,7 @@ void SharedRuntime::generate_deopt_blob() {
 #endif // ASSERT
   __ mv(c_rarg0, xthread);
   __ mv(c_rarg1, xcpool);
-  int32_t offset = 0;
-  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), t0);
   __ bind(retaddr);
 
   // Need to have an oopmap that tells fetch_unroll_info where to
@@ -2156,9 +2148,7 @@ void SharedRuntime::generate_deopt_blob() {
 
   __ mv(c_rarg0, xthread);
   __ mv(c_rarg1, xcpool); // second arg: exec_mode
-  offset = 0;
-  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), t0);
 
   // Set an oopmap for the call site
   // Use the same PC we used for the last java frame
@@ -2242,11 +2232,9 @@ void SharedRuntime::generate_uncommon_trap_blob() {
 
   __ mv(c_rarg0, xthread);
   __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
-  int32_t offset = 0;
-  __ la_patchable(t0,
+  __ jalr_patchable(x1,
         RuntimeAddress(CAST_FROM_FN_PTR(address,
-                                        Deoptimization::uncommon_trap)), offset);
-  __ jalr(x1, t0, offset);
+                                        Deoptimization::uncommon_trap)), t0);
   __ bind(retaddr);
 
   // Set an oopmap for the call site
@@ -2368,9 +2356,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   // sp should already be aligned
   __ mv(c_rarg0, xthread);
   __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
-  offset = 0;
-  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), t0);
 
   // Set an oopmap for the call site
   // Use the same PC we used for the last java frame
@@ -2439,9 +2425,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
 
   // Do the call
   __ mv(c_rarg0, xthread);
-  int32_t offset = 0;
-  __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(call_ptr), t0);
   __ bind(retaddr);
 
   // Set an oopmap for the call site.  This oopmap will map all
@@ -2549,9 +2533,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
     __ set_last_Java_frame(sp, noreg, retaddr, t0);
 
     __ mv(c_rarg0, xthread);
-    int32_t offset = 0;
-    __ la_patchable(t0, RuntimeAddress(destination), offset);
-    __ jalr(x1, t0, offset);
+    __ jalr_patchable(x1, RuntimeAddress(destination), t0);
     __ bind(retaddr);
   }
 
@@ -2688,9 +2670,7 @@ void OptoRuntime::generate_exception_blob() {
   address the_pc = __ pc();
   __ set_last_Java_frame(sp, noreg, the_pc, t0);
   __ mv(c_rarg0, xthread);
-  int32_t offset = 0;
-  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
-  __ jalr(x1, t0, offset);
+  __ jalr_patchable(x1, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), t0);
 
 
   // handle_exception_C is a special VM call which does not require an explicit
diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
index affb0f8b11e..2650c4f2a59 100644
--- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp
+++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
@@ -103,6 +103,12 @@ void VM_Version::get_processor_features() {
     FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
   }
 
+  // compressed instruction extension
+  if (UseRVC && !(_features & CPU_C)) {
+    warning("RVC is not supported on this CPU");
+    FLAG_SET_DEFAULT(UseRVC, false);
+  }
+
   if (UseRVV) {
     if (!(_features & CPU_V)) {
       warning("RVV is not supported on this CPU");
diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp
index 63d44d7a7b6..853b7daf420 100644
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
@@ -431,7 +431,7 @@ class PatchingStub: public CodeStub {
       NativeMovRegMem* n_move = nativeMovRegMem_at(pc_start());
       n_move->set_offset(field_offset);
       // Copy will never get executed, so only copy the part which is required for patching.
-      _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), (int)NativeGeneralJump::instruction_size);
+      _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), NOT_RISCV((int)NativeGeneralJump::instruction_size) RISCV_ONLY(NativeGeneralJump::get_instruction_size()));
     } else if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
       assert(_obj != noreg, "must have register object for load_klass/load_mirror");
 #ifdef ASSERT
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
index f8ddd9d8d1b..754fdd2bcc8 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@@ -40,7 +40,7 @@ void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_cod
   // We must have enough patching space so that call can be inserted.
   // We cannot use fat nops here, since the concurrent code rewrite may transiently
   // create the illegal instruction sequence.
-  while ((intx) _masm->pc() - (intx) patch->pc_start() < NativeGeneralJump::instruction_size) {
+  while ((intx) _masm->pc() - (intx) patch->pc_start() < NOT_RISCV(NativeGeneralJump::instruction_size) RISCV_ONLY(NativeGeneralJump::get_instruction_size()) ) {
     _masm->nop();
   }
   patch->install(_masm, patch_code, obj, info);
diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
index 5b24e062b18..d3fa1578f95 100644
--- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
+++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp
@@ -329,7 +329,7 @@ JVMFlag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
    }
 
   int minimum_alignment = 16;
-#if defined(X86) && !defined(AMD64)
+#if (defined(X86) && !defined(AMD64)) || defined(RISCV)
   minimum_alignment = 4;
 #elif defined(S390)
   minimum_alignment = 2;