diff --git a/.github/labeler.yml b/.github/labeler.yml index 38bc906e51..168ebfba62 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -133,6 +133,11 @@ SH: - include/capstone/sh.h - tests/details/sh.yaml +Etca: + - arch/Etca/** + - cstool/cstool_etca.c + - include/capstone/etca.h + Sparc: - arch/Sparc/** - cstool/cstool_sparc.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 718115043b..2aa258cda1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,8 +106,8 @@ if(APPLE AND NOT CAPSTONE_BUILD_MACOS_THIN) set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64") endif() -set(SUPPORTED_ARCHITECTURES ARM AARCH64 M68K MIPS PPC SPARC SYSTEMZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM BPF RISCV SH TRICORE ALPHA HPPA LOONGARCH XTENSA ARC) -set(SUPPORTED_ARCHITECTURE_LABELS ARM AARCH64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM BPF RISCV SH TriCore Alpha HPPA LoongArch Xtensa ARC) +set(SUPPORTED_ARCHITECTURES ARM AARCH64 M68K MIPS PPC SPARC SYSTEMZ XCORE X86 TMS320C64X M680X EVM MOS65XX WASM BPF RISCV SH Etca TRICORE ALPHA HPPA LOONGARCH XTENSA ARC) +set(SUPPORTED_ARCHITECTURE_LABELS ARM AARCH64 M68K MIPS PowerPC Sparc SystemZ XCore x86 TMS320C64x M680x EVM MOS65XX WASM BPF RISCV SH Etca TriCore Alpha HPPA LoongArch Xtensa ARC) # If building for OSX it's best to allow CMake to handle building both architectures if(APPLE AND NOT CAPSTONE_BUILD_MACOS_THIN) @@ -228,7 +228,7 @@ set(HEADERS_COMMON include/capstone/sh.h include/capstone/tricore.h include/capstone/platform.h - include/capstone/sh.h + include/capstone/etca.h include/capstone/alpha.h include/capstone/hppa.h include/capstone/loongarch.h @@ -633,6 +633,20 @@ if(CAPSTONE_SH_SUPPORT) ) endif() +if(CAPSTONE_Etca_SUPPORT) + add_definitions(-DCAPSTONE_HAS_ETCA) + set(SOURCES_ETCA + arch/Etca/EtcaDisassembler.c + arch/Etca/EtcaInstPrinter.c + arch/Etca/EtcaModule.c + ) + set(HEADERS_ETCA + arch/Etca/EtcaDisassembler.h + arch/Etca/EtcaInstPrinter.h + arch/Etca/EtcaModule.h + ) +endif() + if (CAPSTONE_TRICORE_SUPPORT) add_definitions(-DCAPSTONE_HAS_TRICORE) set(SOURCES_TRICORE @@ -765,6 +779,7 @@ set(ALL_SOURCES ${SOURCES_BPF} ${SOURCES_RISCV} ${SOURCES_SH} + ${SOURCES_ETCA} ${SOURCES_TRICORE} ${SOURCES_ALPHA} ${SOURCES_HPPA} @@ -793,6 +808,7 @@ set(ALL_HEADERS ${HEADERS_BPF} ${HEADERS_RISCV} ${HEADERS_SH} + ${HEADERS_ETCA} ${HEADERS_TRICORE} ${HEADERS_ALPHA} ${HEADERS_HPPA} @@ -868,6 +884,7 @@ source_group("Source\\MOS65XX" FILES ${SOURCES_MOS65XX}) source_group("Source\\BPF" FILES ${SOURCES_BPF}) source_group("Source\\RISCV" FILES ${SOURCES_RISCV}) source_group("Source\\SH" FILES ${SOURCES_SH}) +source_group("Source\\Etca" FILES ${SOURCES_ETCA}) source_group("Source\\TriCore" FILES ${SOURCES_TRICORE}) source_group("Source\\Alpha" FILES ${SOURCES_ALPHA}) source_group("Source\\HPPA" FILES ${SOURCES_HPPA}) @@ -894,6 +911,7 @@ source_group("Include\\MOS65XX" FILES ${HEADERS_MOS65XX}) source_group("Include\\BPF" FILES ${HEADERS_BPF}) source_group("Include\\RISCV" FILES ${HEADERS_RISCV}) source_group("Include\\SH" FILES ${HEADERS_SH}) +source_group("Include\\Etca" FILES ${HEADERS_ETCA}) source_group("Include\\TriCore" FILES ${HEADERS_TRICORE}) source_group("Include\\Alpha" FILES ${HEADERS_ALPHA}) source_group("Include\\HPPA" FILES ${HEADERS_HPPA}) diff --git a/arch/Etca/EtcaDisassembler.c b/arch/Etca/EtcaDisassembler.c new file mode 100644 index 0000000000..1df9127404 --- /dev/null +++ b/arch/Etca/EtcaDisassembler.c @@ -0,0 +1,818 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#include "capstone/capstone.h" +#include "capstone/etca.h" +#include +#include +#include +#include +#include "../../cs_priv.h" +#include "../../MCInst.h" +#include "../../MathExtras.h" +#include "EtcaDisassembler.h" + +typedef struct { + struct { + bool present; + } pfx_cond; + + uint8_t cond : 4; + + struct { + bool present; + bool q : 1; + bool a : 1; + bool b : 1; + bool x : 1; + } pfx_rex; + + struct { + bool present; + uint8_t a : 3; + } single_reg; + + struct { + bool present; + uint8_t a : 3; + uint8_t b : 3; + uint8_t m : 3; + } abm; + + struct { + bool present; + uint8_t r : 3; + uint64_t imm; + } ri; + + struct { + bool present; + uint64_t extended; + } rel; + + union { + struct { + uint8_t sib; + uint64_t extended_disp; + } mo1; + + struct { + uint8_t sib; + uint64_t extended_disp; + uint64_t imm; + } mo2; + } x; + + uint8_t ss : 2; + etca_insn insn; +} DecodeIsntCtx; + +static bool doesSignExtend(etca_insn insn) +{ + switch (insn) { + case ETCA_INS_INVALID: + case ETCA_INS_NOP: + case ETCA_INS_ENDING: + case ETCA_INS_SYSCALL: + case ETCA_INS_ERET: + case ETCA_INS_WAIT: + case ETCA_INS_REL_JMP: + case ETCA_INS_ABS_JMP: + case ETCA_INS_REL_CALL: + case ETCA_INS_ABS_CALL: + case ETCA_INS_LEA: + case ETCA_INS_CACHE_FLUSH_ALL: + case ETCA_INS_DATA_PREFETCH: + case ETCA_INS_INSTRUCTION_PREFETCH: + case ETCA_INS_DCACHE_FLUSH: + case ETCA_INS_ICACHE_INVALIDATE: + case ETCA_INS_CACHE_INVALIDATE_ALL: + case ETCA_INS_DCACHE_INVALIDATE: + case ETCA_INS_ALLOC_ZERO: + return false; /* not applicable */ + + case ETCA_INS_ADD: + case ETCA_INS_SUB: + case ETCA_INS_RSUB: + case ETCA_INS_CMP: + case ETCA_INS_OR: + case ETCA_INS_XOR: + case ETCA_INS_AND: + case ETCA_INS_TEST: + case ETCA_INS_MOVS: + return true; /* sign extend */ + + case ETCA_INS_MOVZ: + case ETCA_INS_LOAD: + case ETCA_INS_STORE: + case ETCA_INS_SLO: + case ETCA_INS_READCR: + case ETCA_INS_WRITECR: + return false; /* zero extend */ + + case ETCA_INS_PUSH: + case ETCA_INS_POP: + case ETCA_INS_ADC: + case ETCA_INS_SBB: + case ETCA_INS_RSBB: + case ETCA_INS_ASR: + case ETCA_INS_ROL: + case ETCA_INS_ROR: + case ETCA_INS_SHL: + case ETCA_INS_SHR: + case ETCA_INS_RCL: + case ETCA_INS_RCR: + case ETCA_INS_POPCNT: + case ETCA_INS_GREV: + case ETCA_INS_CTZ: + case ETCA_INS_CLZ: + case ETCA_INS_NOT: + case ETCA_INS_ANDN: + case ETCA_INS_UDIV: + case ETCA_INS_SDIV: + case ETCA_INS_UREM: + case ETCA_INS_SREM: + case ETCA_INS_UMUL: + case ETCA_INS_SMUL: + case ETCA_INS_UHMUL: + case ETCA_INS_SHMUL: + case ETCA_INS_LSB: + case ETCA_INS_LSBMSK: + case ETCA_INS_RLSB: + case ETCA_INS_ZHIB: + return true; /* TODO: this makes no sense */ + } +} + +static void parseABM(DecodeIsntCtx *ctx, uint8_t byte) +{ + ctx->abm.present = true; + ctx->abm.a = byte >> 5; + ctx->abm.b = (byte >> 3) & (7 /* 0b111 */); + ctx->abm.m = byte & 3; +} + +static void parseRI(DecodeIsntCtx *ctx, uint8_t byte, etca_insn insn) +{ + ctx->ri.present = true; + ctx->ri.r = byte >> 5; + ctx->ri.imm = byte & 31 /* 0b11111 */; + if (doesSignExtend(insn)) + ctx->ri.imm = SignExtend64(ctx->ri.imm, 5); +} + +static etca_insn parseExopOpcode(uint16_t opc) +{ + // clang-format off + switch (opc) + { + /* exop */ + case 0: return ETCA_INS_ADC; + case 1: return ETCA_INS_SBB; + case 2: return ETCA_INS_RSBB; + case 3: return ETCA_INS_ASR; + case 4: return ETCA_INS_ROL; + case 5: return ETCA_INS_ROR; + case 6: return ETCA_INS_SHL; + case 7: return ETCA_INS_SHR; + + /* bmi1 */ + case 8: return ETCA_INS_RCL; + case 9: return ETCA_INS_RCR; + case 10: return ETCA_INS_POPCNT; + case 11: return ETCA_INS_GREV; + case 12: return ETCA_INS_CTZ; + case 13: return ETCA_INS_CLZ; + case 14: return ETCA_INS_NOT; + case 15: return ETCA_INS_ANDN; + case 0x18: return ETCA_INS_LSB; + case 0x19: return ETCA_INS_LSBMSK; + case 0x1a: return ETCA_INS_RLSB; + case 0x1b: return ETCA_INS_ZHIB; + + /* md */ + case 0x10: return ETCA_INS_UDIV; + case 0x11: return ETCA_INS_SDIV; + case 0x12: return ETCA_INS_UREM; + case 0x13: return ETCA_INS_SREM; + case 0x14: return ETCA_INS_UMUL; + case 0x15: return ETCA_INS_SMUL; + case 0x16: return ETCA_INS_UHMUL; + case 0x17: return ETCA_INS_SHMUL; + } + // clang-format on + + return ETCA_INS_INVALID; +} + +static etca_insn parseBaseOpcode(uint8_t opc, bool imm) +{ + // clang-format off + switch (opc) + { + case 0: return ETCA_INS_ADD; + case 1: return ETCA_INS_SUB; + case 2: return ETCA_INS_RSUB; + case 3: return ETCA_INS_CMP; + case 4: return ETCA_INS_OR; + case 5: return ETCA_INS_XOR; + case 6: return ETCA_INS_AND; + case 7: return ETCA_INS_TEST; + case 8: return ETCA_INS_MOVZ; + case 9: return ETCA_INS_MOVS; + case 10: return ETCA_INS_LOAD; + case 11: return ETCA_INS_STORE; + case 12: return imm ? ETCA_INS_SLO : ETCA_INS_INVALID; + case 14: return imm ? ETCA_INS_READCR : ETCA_INS_INVALID; + case 15: return imm ? ETCA_INS_WRITECR : ETCA_INS_INVALID; + } + // clang-format on + + return ETCA_INS_INVALID; +} + +static bool parseCoreOp(DecodeIsntCtx *ctx, const uint8_t **code_p, + size_t *code_len_p, uint16_t *size) +{ + const uint8_t *code = *code_p; + size_t code_len = *code_len_p; + + if (code_len >= 3 && code[0] >> 4 == 2 + 4 + 8 /* 0b1110 */) { + uint16_t opc = (code[0] & 0xF) << 5; + opc |= (code[1] >> 7) << 4; + opc |= code[1] & 0xF; + ctx->insn = parseExopOpcode(opc); + if (ctx->insn == ETCA_INS_INVALID) + return false; + + ctx->ss = (code[1] >> 4) & 3; + + if (code[1] & (1 << 6)) { + parseRI(ctx, code[2], ctx->insn); + } else { + parseABM(ctx, code[2]); + } + + code += 3; + code_len -= 3; + (*size) += 3; + } else if (code_len >= 2 && code[0] == 0x2F && code[1] == 0x11) { + ctx->insn = ETCA_INS_WAIT; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x0F && code[1] == 0x11) { + ctx->insn = ETCA_INS_SYSCALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x1F && code[1] == 0x11) { + ctx->insn = ETCA_INS_ERET; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 0) { + ctx->insn = ETCA_INS_ALLOC_ZERO; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xF && (code[1] << 3) >> 3 == 4) { + ctx->insn = ETCA_INS_DCACHE_INVALIDATE; + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x3F && code[1] == 0x11) { + ctx->insn = ETCA_INS_CACHE_INVALIDATE_ALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x8F && code[1] == 0x01) { + ctx->insn = ETCA_INS_CACHE_FLUSH_ALL; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xc) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_POP; + parseABM(ctx, code[1]); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 0 && + (code[0] & 0xF) == 0xd) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_PUSH; + parseABM(ctx, code[1]); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 6 == 1 && + (code[0] & 0xF) == 0xd) { + ctx->ss = (code[0] >> 4) & 3; + ctx->insn = ETCA_INS_PUSH; + parseRI(ctx, code[1], ctx->insn); + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0x9F && + ((code[1] >> 2) & 3) == 0) { + // clang-format off + switch (code[1] & 3) + { + case 0: ctx->insn = ETCA_INS_DATA_PREFETCH; break; + case 1: ctx->insn = ETCA_INS_INSTRUCTION_PREFETCH; break; + case 2: ctx->insn = ETCA_INS_DCACHE_FLUSH; break; + case 3: ctx->insn = ETCA_INS_DCACHE_INVALIDATE; break; + } + // clang-format on + + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] == 0xaf) { + if ((code[1] >> 4) & 1) + ctx->insn = ETCA_INS_ABS_CALL; + else + ctx->insn = ETCA_INS_ABS_JMP; + + if (ctx->pfx_cond.present) + return false; + + ctx->cond = code[1] & 0xF; + + ctx->single_reg.present = true; + ctx->single_reg.a = code[1] >> 5; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 4 == 0xb) { + ctx->insn = ETCA_INS_REL_CALL; + + uint64_t d = (code[0] & 0xF) << 8 | code[1]; + d = SignExtend64(d, 12); + ctx->rel.present = true; + ctx->rel.extended = d; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len >= 2 && code[0] >> 5 == 4) { + ctx->insn = ETCA_INS_REL_JMP; + + if (ctx->pfx_cond.present) + return false; + + ctx->cond = code[0] & 0xF; + + uint64_t d = ((code[0] >> 4) & 1) << 8 | code[1]; + d = SignExtend64(d, 9); + ctx->rel.present = true; + ctx->rel.extended = d; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (code_len && code[0] >> 4 == 0xF && + code_len >= 1 + (1 << (code[0] & 3))) { + ctx->ss = code[0] & 3; + int sz = 1 << ctx->ss; + + uint64_t d = 0; + for (int i = 0; i < sz; i++) { + d <<= 8; + d |= code[i + 1]; + } + ctx->rel.present = true; + ctx->rel.extended = d; + + // clang-format off + switch ((code[0] >> 2) & 3) { + case 0: ctx->insn = ETCA_INS_REL_JMP; break; + case 1: ctx->insn = ETCA_INS_ABS_JMP; break; + case 2: ctx->insn = ETCA_INS_REL_CALL; break; + case 3: ctx->insn = ETCA_INS_ABS_CALL; break; + } + // clang-format on + + if (ctx->insn == ETCA_INS_REL_JMP || + ctx->insn == ETCA_INS_REL_CALL) + d = SignExtend64(d, sz * 8); + + code += sz + 1; + code_len -= sz + 1; + (*size) += sz + 1; + } else if (code_len >= 2 && code[0] >> 7 == 0 && + code[0] >> 2 != 7 /* 0b111 */) { + bool imm = code[0] & (1 << 6); + + if (imm) { + parseRI(ctx, code[1], ctx->insn); + } else { + parseABM(ctx, code[1]); + } + + /* abm with fi is treated as imm */ + if (!imm && ctx->abm.m == 1 && + (ctx->abm.b == 2 || ctx->abm.b == 3)) { + imm = true; + } + + ctx->insn = parseBaseOpcode(code[0] & 0xF, imm); + if (ctx->insn == ETCA_INS_INVALID) + return false; + + ctx->ss = (code[0] >> 4) & 3; + + code += 2; + code_len -= 2; + (*size) += 2; + } else { + return false; + } + + *code_len_p = code_len; + *code_p = code; + + return true; +} + +typedef struct { + uint8_t scale : 2; + uint8_t index : 3; + uint8_t base : 3; +} sib_byte; + +static sib_byte parseSib(uint8_t b) +{ + return (sib_byte){ b >> 6, (b >> 3) & 3, b & 3 }; +} + +static uint64_t parseMultiByteUInt(const uint8_t *code, size_t nb) +{ + uint64_t imm = 0; + for (size_t i = 0; i < nb; i++) { + imm <<= 8; + imm |= code[i]; + } + return imm; +} + +static bool parseM(etca_info *info, size_t ptrWidthB, DecodeIsntCtx *ctx, + const uint8_t **code_p, size_t *code_len_p, uint16_t *size) +{ + const uint8_t *code = *code_p; + size_t code_len = *code_len_p; + + if (ctx->abm.m == 0) { + /* base */ + } else if (ctx->abm.m == 1 && ctx->abm.b == 0 && ctx->abm.a != 0 && + ctx->abm.a != 4) { + /* from mo2 */ + + if (!code_len) + return false; + sib_byte sib = parseSib(code[0]); + + if (ctx->abm.a == 1) { + /* sib, dP, i8 || [dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 2) { + /* sib, i8 || [sib.b], i8 */ + + if (!(code_len >= 2)) + return false; + + uint8_t i8 = code[1]; + + cs_etca_op_mem memop = { 0 }; + memop.base.enabled = true; + memop.base.base = sib.base; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2; + code_len -= 2; + (*size) += 2; + } else if (ctx->abm.a == 3) { + /* sib, dP, i8 || [sib.b + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.base.enabled = true; + memop.base.base = sib.base; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 5) { + /* sib, dP, i8 || [2^sib.s*sib.x + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 6) { + /* sib, i8 || [2^sib.s*sib.x + sib.b], i8 */ + + if (!(code_len >= 2)) + return false; + + uint8_t i8 = code[1]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.base.enabled = true; + memop.base.base = sib.base; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } else if (ctx->abm.a == 7) { + /* sib, dP, i8 || [2^sib.s*sib.x + sib.b + dP], i8 */ + + size_t dPWidth = ptrWidthB; + if (dPWidth == 8 && + !(ctx->pfx_rex.present && ctx->pfx_rex.q)) + dPWidth = 4; + + if (!(code_len >= dPWidth + 2)) + return false; + + uint64_t dP = parseMultiByteUInt(&code[1], dPWidth); + uint8_t i8 = code[1 + dPWidth]; + + cs_etca_op_mem memop = { 0 }; + memop.index.enabled = true; + memop.index.index = sib.index; + memop.index.index_multiplier_log2 = sib.scale; + memop.base.enabled = true; + memop.base.base = sib.base; + memop.displacement = dP; + + info->op.operands[0].type = ETCA_OP_MEM; + info->op.operands[0].mem = memop; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = i8; + + code += 2 + ptrWidthB; + code_len -= 2 + ptrWidthB; + (*size) += 2 + ptrWidthB; + } + } + // TODO: finish mo2; mo1 + else + return false; + + *code_len_p = code_len; + *code_p = code; + + return true; +} + +// returns true if valid +bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *mcInstr, uint16_t * /* out */ size, + uint64_t address, void *infoIn) +{ + etca_info *info = infoIn; + // cs_detail *detail = mcInstr->flat_insn->detail; + + size_t ptrWidthLog2; + // clang-format off + switch (mcInstr->csh->mode) { + case CS_MODE_ETCA16: ptrWidthLog2 = 1; break; + case CS_MODE_ETCA32: ptrWidthLog2 = 2; break; + case CS_MODE_ETCA64: ptrWidthLog2 = 3; break; + default: ptrWidthLog2 = 1; break; + } + // clang-format on + size_t ptrWidthB = 1 << ptrWidthLog2; + + DecodeIsntCtx ctx = { 0 }; + ctx.insn = ETCA_INS_INVALID; + ctx.cond = ETCA_COND_ALWAYS; + *size = 0; + + /* conditional prefix */ + if (code_len && code[0] >> 4 == 2 + 8 /* 0b1010 */ && + (code[0] & 0xF) < 14) { + // cond prefix of always / never isn't allowed + ctx.pfx_cond.present = true; + ctx.cond = code[0] & 0xF; + code++; + code_len--; + (*size)++; + } + + /* register expansion prefix */ + if (code_len && code[0] >> 4 == 4 + 8 /* 0b1100 */) { + ctx.pfx_rex.present = true; + ctx.pfx_rex.q = (code[0] >> 3) & 1; + ctx.pfx_rex.a = (code[0] >> 2) & 1; + ctx.pfx_rex.b = (code[0] >> 1) & 1; + ctx.pfx_rex.x = (code[0] >> 0) & 1; + code++; + code_len--; + (*size)++; + } + + // if (code_len && code[0] >> 4 == 1 + 4 + 8 /* 0b1101 */) + // unused prefix + + if (!parseCoreOp(&ctx, &code, &code_len, size)) + return false; + + if (ctx.abm.present && ctx.abm.m == 1 && ctx.abm.b == 2) { + /* 1B full immediate */ + if (!(code_len >= 1)) + return false; + ctx.abm.present = false; + ctx.ri.present = true; + ctx.ri.r = ctx.abm.a; + ctx.ri.imm = code[0]; + + code += 1; + code_len -= 1; + (*size) += 1; + } else if (ctx.abm.present && ctx.abm.m == 1 && ctx.abm.b == 3) { + /* nB full immediate */ + size_t sz = 1 << ctx.ss; + if (sz == 8 && !(ctx.pfx_rex.present && ctx.pfx_rex.q)) + sz = 4; + + if (!(code_len >= sz)) + return false; + ctx.abm.present = false; + ctx.ri.present = true; + ctx.ri.r = ctx.abm.a; + ctx.ri.imm = parseMultiByteUInt(code, sz); + + code += sz; + code_len -= sz; + (*size) += sz; + } + + memset(info, 0, sizeof(*info)); + info->op.cond = ctx.cond; + info->op.insn = ctx.insn; + info->op.ss = ctx.ss; + + if (ctx.rel.present) { + info->op.op_count = 1; + + cs_etca_op *rel = &info->op.operands[0]; + rel->type = ETCA_OP_IMM; + rel->imm = ctx.rel.extended; + } else if (ctx.ri.present) { + info->op.op_count = 2; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.ri.r; + + info->op.operands[1].type = ETCA_OP_IMM; + info->op.operands[1].imm = ctx.ri.imm; + } else if (ctx.abm.present) { + info->op.op_count = 2; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.abm.a; + + info->op.operands[1].type = ETCA_OP_REG; + info->op.operands[1].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.abm.b; + + if (!parseM(info, ptrWidthB, &ctx, &code, &code_len, size)) + return false; + } else if (ctx.single_reg.present) { + info->op.op_count = 1; + + info->op.operands[0].type = ETCA_OP_REG; + info->op.operands[0].reg = + ((ctx.pfx_rex.present && ctx.pfx_rex.a) ? + ETCA_REG_FIRST_REX : + ETCA_REG_FIRST_BASE) + + ctx.single_reg.a; + } + + // TODO: add_group + + return true; +} + +#ifndef CAPSTONE_DIET +void Etca_reg_access(const cs_insn *insn, cs_regs regs_read, + uint8_t *regs_read_count, cs_regs regs_write, + uint8_t *regs_write_count) +{ + *regs_read_count = 0; + *regs_write_count = 0; + // TODO +} +#endif diff --git a/arch/Etca/EtcaDisassembler.h b/arch/Etca/EtcaDisassembler.h new file mode 100644 index 0000000000..3a9688471f --- /dev/null +++ b/arch/Etca/EtcaDisassembler.h @@ -0,0 +1,20 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_DISASSEMBLER_H +#define CS_ETCA_DISASSEMBLER_H + +#include "../../MCInst.h" + +typedef struct etca_info { + cs_etca op; +} etca_info; + +bool Etca_getInstruction(csh ud, const uint8_t *code, size_t code_len, + MCInst *instr, uint16_t *size, uint64_t address, + void *info); + +void Etca_reg_access(const cs_insn *insn, cs_regs regs_read, + uint8_t *regs_read_count, cs_regs regs_write, + uint8_t *regs_write_count); +#endif diff --git a/arch/Etca/EtcaInstPrinter.c b/arch/Etca/EtcaInstPrinter.c new file mode 100644 index 0000000000..d062d704b4 --- /dev/null +++ b/arch/Etca/EtcaInstPrinter.c @@ -0,0 +1,532 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#include "EtcaDisassembler.h" +#include "capstone/etca.h" +#include "../../Mapping.h" +#include "EtcaInstPrinter.h" + +const char *Etca_reg_name(csh handle, unsigned int reg) +{ +#ifdef CAPSTONE_DIET + return NULL; +#else + if (reg >= ETCA_REG_ENDING) + reg = ETCA_REG_INVALID; + + switch ((cs_etca_reg)reg) { + case ETCA_REG_INVALID: + case ETCA_REG_ENDING: + return ""; + + case ETCA_REG_R0: + return "r0"; + case ETCA_REG_R1: + return "r1"; + case ETCA_REG_R2: + return "r2"; + case ETCA_REG_R3: + return "r3"; + case ETCA_REG_R4: + return "r4"; + case ETCA_REG_R5: + return "r5"; + case ETCA_REG_R6: + return "r6"; + case ETCA_REG_R7: + return "r7"; + + case ETCA_REG_R8: + return "r8"; + case ETCA_REG_R9: + return "r9"; + case ETCA_REG_R10: + return "r10"; + case ETCA_REG_R11: + return "r11"; + case ETCA_REG_R12: + return "r12"; + case ETCA_REG_R13: + return "r13"; + case ETCA_REG_R14: + return "r14"; + case ETCA_REG_R15: + return "r15"; + } +#endif +} + +void Etca_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id) +{ + insn->id = id; // These id's matches for etca +} + +const char *Etca_insn_name(csh handle, unsigned int id) +{ +#ifdef CAPSTONE_DIET + return NULL; +#else + if (id >= ETCA_INS_ENDING) + id = ETCA_INS_INVALID; + + switch ((etca_insn)id) { + case ETCA_INS_INVALID: + case ETCA_INS_ENDING: + return ""; + + case ETCA_INS_NOP: + return "nop"; + + case ETCA_INS_REL_JMP: + return "jmp"; + case ETCA_INS_ABS_JMP: + return "jmp"; + case ETCA_INS_REL_CALL: + return "call"; + case ETCA_INS_ABS_CALL: + return "call"; + + case ETCA_INS_ADD: + return "add"; + case ETCA_INS_SUB: + return "sub"; + case ETCA_INS_RSUB: + return "rsub"; + case ETCA_INS_CMP: + return "cmp"; + case ETCA_INS_OR: + return "or"; + case ETCA_INS_XOR: + return "xor"; + case ETCA_INS_AND: + return "and"; + case ETCA_INS_TEST: + return "test"; + case ETCA_INS_MOVZ: + return "movz"; + case ETCA_INS_MOVS: + return "movs"; + case ETCA_INS_LOAD: + return "load"; + case ETCA_INS_STORE: + return "store"; + case ETCA_INS_SLO: + return "slo"; + + case ETCA_INS_READCR: + return "readcr"; + case ETCA_INS_WRITECR: + return "writecr"; + case ETCA_INS_SYSCALL: + return "syscall"; + case ETCA_INS_ERET: + return "eret"; + case ETCA_INS_WAIT: + return "wait"; + + case ETCA_INS_PUSH: + return "push"; + case ETCA_INS_POP: + return "pop"; + case ETCA_INS_LEA: + return "lea"; + case ETCA_INS_ADC: + return "adc"; + case ETCA_INS_SBB: + return "sbb"; + case ETCA_INS_RSBB: + return "rsbb"; + case ETCA_INS_ASR: + return "asr"; + case ETCA_INS_ROL: + return "rol"; + case ETCA_INS_ROR: + return "ror"; + case ETCA_INS_SHL: + return "shl"; + case ETCA_INS_SHR: + return "shr"; + case ETCA_INS_RCL: + return "rcl"; + case ETCA_INS_RCR: + return "rcr"; + case ETCA_INS_POPCNT: + return "popcnt"; + case ETCA_INS_GREV: + return "grev"; + case ETCA_INS_CTZ: + return "ctz"; + case ETCA_INS_CLZ: + return "clz"; + case ETCA_INS_NOT: + return "not"; + case ETCA_INS_ANDN: + return "andn"; + case ETCA_INS_UDIV: + return "udiv"; + case ETCA_INS_SDIV: + return "sdiv"; + case ETCA_INS_UREM: + return "urem"; + case ETCA_INS_SREM: + return "srem"; + case ETCA_INS_UMUL: + return "umul"; + case ETCA_INS_SMUL: + return "smul"; + case ETCA_INS_UHMUL: + return "uhmul"; + case ETCA_INS_SHMUL: + return "shmul"; + case ETCA_INS_LSB: + return "lsb"; + case ETCA_INS_LSBMSK: + return "lsmsk"; + case ETCA_INS_RLSB: + return "rlsb"; + case ETCA_INS_ZHIB: + return "zhib"; + + case ETCA_INS_CACHE_FLUSH_ALL: + return "cache_flush_all"; + case ETCA_INS_DATA_PREFETCH: + return "data_prefetch"; + case ETCA_INS_INSTRUCTION_PREFETCH: + return "instruction_prefetch"; + case ETCA_INS_DCACHE_FLUSH: + return "dcache_flush"; + case ETCA_INS_ICACHE_INVALIDATE: + return "icache_invalidate"; + case ETCA_INS_CACHE_INVALIDATE_ALL: + return "cache_invalidate_all"; + case ETCA_INS_DCACHE_INVALIDATE: + return "dcache_invalidate"; + case ETCA_INS_ALLOC_ZERO: + return "alloc_zero"; + } +#endif +} + +const char *cs_etca_cond_name(uint8_t cond) +{ + switch (cond) { + case ETCA_COND_Z: + return "z"; + case ETCA_COND_N: + return "n"; + case ETCA_COND_C: + return "c"; + case ETCA_COND_O: + return "o"; + case ETCA_COND_BE: + return "be"; + case ETCA_COND_L: + return "l"; + case ETCA_COND_LE: + return "le"; + case ETCA_COND_ALWAYS: + return "always"; + + case ETCA_COND_NZ: + return "nz"; + case ETCA_COND_NN: + return "nn"; + case ETCA_COND_NC: + return "nc"; + case ETCA_COND_NO: + return "no"; + case ETCA_COND_A: + return "a"; + case ETCA_COND_GE: + return "ge"; + case ETCA_COND_G: + return "u"; + case ETCA_COND_NEVER: + return "never"; + + default: + return NULL; + } +} + +const char *cs_etca_cr_name(cs_etca_cr cr) +{ + switch (cr) { + case ETCA_CR_CPUID1: + return "cpuid1"; + case ETCA_CR_CPUID2: + return "cpuid2"; + case ETCA_CR_FEAT: + return "feat"; + case ETCA_CR_FLAGS: + return "flags"; + case ETCA_CR_INT_PC: + return "int_pc"; + case ETCA_CR_INT_RET_PC: + return "int_ret_pc"; + case ETCA_CR_INT_MASK: + return "int_mask"; + case ETCA_CR_INT_PENDING: + return "int_pending"; + case ETCA_CR_INT_CAUSE: + return "int_cause"; + case ETCA_CR_INT_DATA: + return "int_data"; + case ETCA_CR_INT_SCRATCH_0: + return "int_scratch_0"; + case ETCA_CR_INT_SCRATCH_1: + return "int_scratch_1"; + case ETCA_CR_PRIV: + return "priv"; + case ETCA_CR_INT_RET_PRIV: + return "int_ret_priv"; + case ETCA_CR_CACHE_LINE_SIZE: + return "cache_line_size"; + case ETCA_CR_NO_CACHE_START: + return "no_cache_start"; + case ETCA_CR_NO_CACHE_END: + return "no_cache_end"; + case ETCA_CR_MODE: + return "mode"; + } + return NULL; +} + +#ifndef CAPSTONE_DIET +static void printReg(SStream *O, cs_etca_reg reg) +{ + SStream_concat1(O, '%'); + SStream_concat0(O, Etca_reg_name(0, reg)); +} +#endif + +#ifndef CAPSTONE_DIET +static void printMemOp(SStream *O, const cs_etca_op_mem *op) +{ + bool first = true; + SStream_concat1(O, '['); + + if (op->base.enabled) { + printReg(O, op->base.base); + } + + if (op->index.enabled) { + if (!first) { + SStream_concat0(O, " + "); + } + first = false; + + printUInt8(O, 1 << op->index.index_multiplier_log2); + SStream_concat1(O, '*'); + printReg(O, op->index.index); + } + + if (op->displacement) { + if (!first) { + SStream_concat0(O, " + "); + } + first = false; + + printInt64(O, op->displacement); + } + + SStream_concat1(O, ']'); +} +#endif + +#ifndef CAPSTONE_DIET +static void printOp(SStream *O, const cs_etca_op *op, etca_insn insn) +{ + switch (op->type) { + case ETCA_OP_INVALID: + SStream_concat0(O, ""); + break; + + case ETCA_OP_REG: + printReg(O, op->reg); + break; + + case ETCA_OP_IMM: + if ((int64_t)op->imm < 0 && + (insn == ETCA_INS_REL_JMP || insn == ETCA_INS_REL_CALL || + (int64_t)op->imm >= -63)) { + SStream_concat1(O, '-'); + printUInt64(O, -op->imm); + } else { + printUInt64(O, op->imm); + } + break; + + case ETCA_OP_MEM: + printMemOp(O, &op->mem); + break; + } +} +#endif + +#ifndef CAPSTONE_DIET +static bool isSizedInsn(etca_insn insn) +{ + switch (insn) { + case ETCA_INS_INVALID: + case ETCA_INS_NOP: + case ETCA_INS_ENDING: + case ETCA_INS_REL_JMP: + case ETCA_INS_ABS_JMP: + case ETCA_INS_REL_CALL: + case ETCA_INS_ABS_CALL: + case ETCA_INS_ERET: + case ETCA_INS_SYSCALL: + case ETCA_INS_WAIT: + case ETCA_INS_CACHE_FLUSH_ALL: + case ETCA_INS_DATA_PREFETCH: + case ETCA_INS_INSTRUCTION_PREFETCH: + case ETCA_INS_DCACHE_FLUSH: + case ETCA_INS_ICACHE_INVALIDATE: + case ETCA_INS_CACHE_INVALIDATE_ALL: + case ETCA_INS_DCACHE_INVALIDATE: + case ETCA_INS_ALLOC_ZERO: + return false; + + case ETCA_INS_ADD: + case ETCA_INS_SUB: + case ETCA_INS_RSUB: + case ETCA_INS_CMP: + case ETCA_INS_OR: + case ETCA_INS_XOR: + case ETCA_INS_AND: + case ETCA_INS_TEST: + case ETCA_INS_MOVZ: + case ETCA_INS_MOVS: + case ETCA_INS_LOAD: + case ETCA_INS_STORE: + case ETCA_INS_SLO: + case ETCA_INS_READCR: + case ETCA_INS_WRITECR: + case ETCA_INS_PUSH: + case ETCA_INS_POP: + case ETCA_INS_LEA: + case ETCA_INS_ADC: + case ETCA_INS_SBB: + case ETCA_INS_RSBB: + case ETCA_INS_ASR: + case ETCA_INS_ROL: + case ETCA_INS_ROR: + case ETCA_INS_SHL: + case ETCA_INS_SHR: + case ETCA_INS_RCL: + case ETCA_INS_RCR: + case ETCA_INS_POPCNT: + case ETCA_INS_GREV: + case ETCA_INS_CTZ: + case ETCA_INS_CLZ: + case ETCA_INS_NOT: + case ETCA_INS_ANDN: + case ETCA_INS_UDIV: + case ETCA_INS_SDIV: + case ETCA_INS_UREM: + case ETCA_INS_SREM: + case ETCA_INS_UMUL: + case ETCA_INS_SMUL: + case ETCA_INS_UHMUL: + case ETCA_INS_SHMUL: + case ETCA_INS_LSB: + case ETCA_INS_LSBMSK: + case ETCA_INS_RLSB: + case ETCA_INS_ZHIB: + return true; + } +} +#endif + +static inline bool op_is_reg(const cs_etca_op *op, cs_etca_reg reg) +{ + return op->type == ETCA_OP_REG && op->reg == reg; +} + +void Etca_printInst(MCInst *MI, SStream *O, void *infoIn) +{ +#ifndef CAPSTONE_DIET + etca_info *info = (etca_info *)infoIn; + + // first word in buffer has to be mnemonic because of SStream_extract_mnem_opstr!! + + if ((info->op.insn == ETCA_INS_ABS_JMP || + info->op.insn == ETCA_INS_REL_JMP) && + info->op.cond != ETCA_COND_ALWAYS) { + SStream_concat1(O, 'j'); + SStream_concat0(O, cs_etca_cond_name(info->op.cond)); + } else { + SStream_concat0(O, Etca_insn_name(0, info->op.insn)); + if (info->op.cond != ETCA_COND_ALWAYS) { + if (info->op.insn == ETCA_INS_ABS_CALL || + info->op.insn == ETCA_INS_REL_CALL) { + SStream_concat0( + O, cs_etca_cond_name(info->op.cond)); + } else { + SStream_concat( + O, " when %s, ", + cs_etca_cond_name(info->op.cond)); + } + } + } + + if (isSizedInsn(info->op.insn)) { + char s = "hxdq"[info->op.ss & 3]; + SStream_concat1(O, s); + } + + int numPrinted = 0; + for (int i = 0; i < info->op.op_count; i++) { + const cs_etca_op *op = &info->op.operands[i]; + + /* don't print sp reg if it's the default */ + if (i == 1 && info->op.insn == ETCA_INS_POP && + op_is_reg(op, ETCA_REG_R6)) + continue; + if (i == 0 && info->op.insn == ETCA_INS_PUSH && + op_is_reg(op, ETCA_REG_R6)) + continue; + + if (numPrinted != 0) { + SStream_concat0(O, ","); + } + SStream_concat0(O, " "); + + const char *crname; + if (i == 1 && + (info->op.insn == ETCA_INS_READCR || + info->op.insn == ETCA_INS_WRITECR) && + (crname = cs_etca_cr_name(op->imm))) { + SStream_concat0(O, crname); + } else { + printOp(O, op, info->op.insn); + } + + numPrinted++; + } +#endif +} + +const char *Etca_group_name(csh handle, unsigned int id) +{ +#ifndef CAPSTONE_DIET + if (id >= ETCA_GRP_ENDING) + id = ETCA_GRP_ENDING; + + switch ((cs_etca_insn_group)id) { + case ETCA_GRP_ENDING: + case ETCA_GRP_INVALID: + return ""; + + case ETCA_GRP_JUMP: + return "jump"; + case ETCA_GRP_CALL: + return "call"; + case ETCA_GRP_PRIV: + return "privileged"; + } +#else + return NULL; +#endif +} diff --git a/arch/Etca/EtcaInstPrinter.h b/arch/Etca/EtcaInstPrinter.h new file mode 100644 index 0000000000..41e407e486 --- /dev/null +++ b/arch/Etca/EtcaInstPrinter.h @@ -0,0 +1,22 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_INSTPRINTER_H +#define CS_ETCA_INSTPRINTER_H + +#include "capstone/capstone.h" +#include "../../utils.h" +#include "../../MCInst.h" +#include "../../SStream.h" +#include "../../cs_priv.h" +#include "EtcaDisassembler.h" + +struct SStream; + +void Etca_printInst(MCInst *MI, struct SStream *O, void *Info); +const char *Etca_reg_name(csh handle, unsigned int reg); +void Etca_get_insn_id(cs_struct *h, cs_insn *insn, unsigned int id); +const char *Etca_insn_name(csh handle, unsigned int id); +const char *Etca_group_name(csh handle, unsigned int id); + +#endif diff --git a/arch/Etca/EtcaModule.c b/arch/Etca/EtcaModule.c new file mode 100644 index 0000000000..84c3b30305 --- /dev/null +++ b/arch/Etca/EtcaModule.c @@ -0,0 +1,41 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifdef CAPSTONE_HAS_ETCA + +#include "../../cs_priv.h" +#include "EtcaDisassembler.h" +#include "EtcaInstPrinter.h" +#include "EtcaModule.h" + +cs_err Etca_global_init(cs_struct *ud) +{ + etca_info *info; + + info = cs_mem_calloc(1, sizeof(etca_info)); + if (!info) { + return CS_ERR_MEM; + } + + ud->printer = Etca_printInst; + ud->printer_info = info; + ud->getinsn_info = info; + ud->reg_name = Etca_reg_name; + ud->insn_id = Etca_get_insn_id; + ud->insn_name = Etca_insn_name; + ud->group_name = Etca_group_name; + ud->disasm = Etca_getInstruction; + ud->post_printer = NULL; +#ifndef CAPSTONE_DIET + ud->reg_access = Etca_reg_access; +#endif + + return CS_ERR_OK; +} + +cs_err Etca_option(cs_struct *handle, cs_opt_type type, size_t value) +{ + return CS_ERR_OK; +} + +#endif diff --git a/arch/Etca/EtcaModule.h b/arch/Etca/EtcaModule.h new file mode 100644 index 0000000000..b91669d57c --- /dev/null +++ b/arch/Etca/EtcaModule.h @@ -0,0 +1,12 @@ +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifndef CS_ETCA_MODULE_H +#define CS_ETCA_MODULE_H + +#include "../../utils.h" + +cs_err Etca_global_init(cs_struct *ud); +cs_err Etca_option(cs_struct *handle, cs_opt_type type, size_t value); + +#endif diff --git a/bindings/const_generator.py b/bindings/const_generator.py index b2f9cefebc..b626822886 100644 --- a/bindings/const_generator.py +++ b/bindings/const_generator.py @@ -8,7 +8,7 @@ include = ['arm.h', 'aarch64.h', 'm68k.h', 'mips.h', 'x86.h', 'ppc.h', 'sparc.h', 'systemz.h', 'xcore.h', 'tms320c64x.h', 'm680x.h', 'evm.h', 'mos65xx.h', 'wasm.h', 'bpf.h', 'riscv.h', 'sh.h', 'tricore.h', - 'alpha.h', 'hppa.h', 'loongarch.h', 'arc.h', 'xtensa.h'] + 'alpha.h', 'hppa.h', 'loongarch.h', 'arc.h', 'xtensa.h', 'etca.h'] template = { 'java': { @@ -28,6 +28,7 @@ 'tms320c64x.h': 'TMS320C64x', 'm680x.h': 'M680x', 'evm.h': 'Evm', + 'etca.h': 'Etca', 'wasm.h': 'Wasm', 'comment_open': '\t//', 'comment_close': '', diff --git a/cs.c b/cs.c index 80ade811fb..b8791e5d2f 100644 --- a/cs.c +++ b/cs.c @@ -78,6 +78,7 @@ #include "arch/LoongArch/LoongArchModule.h" #include "arch/Xtensa/XtensaModule.h" #include "arch/ARC/ARCModule.h" +#include "arch/Etca/EtcaModule.h" typedef struct cs_arch_config { // constructor initialization @@ -219,6 +220,10 @@ typedef struct cs_arch_config { CS_MODE_TRICORE_162 | CS_MODE_TRICORE_180 | \ CS_MODE_LITTLE_ENDIAN), \ } +#define CS_ARCH_CONFIG_ETCA \ + { \ + Etca_global_init, Etca_option, ~(0), \ + } #define CS_ARCH_CONFIG_ALPHA \ { \ ALPHA_global_init, ALPHA_option, \ @@ -367,6 +372,11 @@ static const cs_arch_config arch_configs[MAX_ARCH] = { #else { NULL, NULL, 0 }, #endif +#ifdef CAPSTONE_HAS_ETCA + CS_ARCH_CONFIG_ETCA, +#else + { NULL, NULL, 0 }, +#endif }; // bitmask of enabled architectures @@ -422,6 +432,9 @@ static const uint32_t all_arch = 0 #ifdef CAPSTONE_HAS_SH | (1 << CS_ARCH_SH) #endif +#ifdef CAPSTONE_HAS_ETCA + | (1 << CS_ARCH_ETCA) +#endif #ifdef CAPSTONE_HAS_TRICORE | (1 << CS_ARCH_TRICORE) #endif @@ -690,7 +703,7 @@ bool CAPSTONE_API cs_support(int query) (1 << CS_ARCH_SH) | (1 << CS_ARCH_TRICORE) | (1 << CS_ARCH_ALPHA) | (1 << CS_ARCH_HPPA) | (1 << CS_ARCH_LOONGARCH) | (1 << CS_ARCH_XTENSA) | - (1 << CS_ARCH_ARC)); + (1 << CS_ARCH_ARC) | (1 << CS_ARCH_ETCA)); if ((unsigned int)query < CS_ARCH_MAX) return all_arch & (1 << query); @@ -999,6 +1012,8 @@ static uint8_t skipdata_size(cs_struct *handle) return 4; case CS_ARCH_SH: return 2; + case CS_ARCH_ETCA: + return 1; case CS_ARCH_TRICORE: // TriCore instruction's length can be 2 or 4 bytes, // so we just skip 2 bytes @@ -1783,6 +1798,12 @@ int CAPSTONE_API cs_op_count(csh ud, const cs_insn *insn, unsigned int op_type) (mos65xx_op_type)op_type) count++; break; + case CS_ARCH_ETCA: + for (i = 0; i < insn->detail->etca.op_count; i++) + if (insn->detail->etca.operands[i].type == + (cs_etca_op_type)op_type) + count++; + break; case CS_ARCH_WASM: for (i = 0; i < insn->detail->wasm.op_count; i++) if (insn->detail->wasm.operands[i].type == @@ -2031,6 +2052,15 @@ int CAPSTONE_API cs_op_index(csh ud, const cs_insn *insn, unsigned int op_type, return i; } break; + case CS_ARCH_ETCA: + for (i = 0; i < insn->detail->etca.op_count; i++) { + if (insn->detail->etca.operands[i].type == + (cs_etca_op_type)op_type) + count++; + if (count == post) + return i; + } + break; case CS_ARCH_ALPHA: for (i = 0; i < insn->detail->alpha.op_count; i++) { if (insn->detail->alpha.operands[i].type == diff --git a/cstool/cstool.c b/cstool/cstool.c index c341f7fb0f..32e11647fd 100644 --- a/cstool/cstool.c +++ b/cstool/cstool.c @@ -339,6 +339,10 @@ static struct { { "evm", "ethereum virtual machine", CS_ARCH_EVM, 0 }, + { "etca16", "ETC.a, 16 bit pointers", CS_ARCH_ETCA, CS_MODE_16 }, + { "etca32", "ETC.a, 32 bit pointers", CS_ARCH_ETCA, CS_MODE_32 }, + { "etca64", "ETC.a, 64 bit pointers", CS_ARCH_ETCA, CS_MODE_64 }, + { "wasm", "web assembly", CS_ARCH_WASM, 0 }, { "bpf", "Classic BPF, little endian", CS_ARCH_BPF, @@ -504,6 +508,8 @@ static const char *get_arch_name(cs_arch arch) return "M680X"; case CS_ARCH_EVM: return "Evm"; + case CS_ARCH_ETCA: + return "Etca"; case CS_ARCH_MOS65XX: return "MOS65XX"; case CS_ARCH_WASM: @@ -620,6 +626,9 @@ static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins) case CS_ARCH_EVM: print_insn_detail_evm(handle, ins); break; + case CS_ARCH_ETCA: + print_insn_detail_etca(handle, ins); + break; case CS_ARCH_WASM: print_insn_detail_wasm(handle, ins); break; @@ -803,6 +812,10 @@ int main(int argc, char **argv) printf("evm=1 "); } + if (cs_support(CS_ARCH_ETCA)) { + printf("etca=1 "); + } + if (cs_support(CS_ARCH_WASM)) { printf("wasm=1 "); } diff --git a/cstool/cstool.h b/cstool/cstool.h index 2d005bc9a0..9d42c99f8a 100644 --- a/cstool/cstool.h +++ b/cstool/cstool.h @@ -24,5 +24,6 @@ void print_insn_detail_hppa(csh handle, cs_insn *ins); void print_insn_detail_loongarch(csh handle, cs_insn *ins); void print_insn_detail_xtensa(csh handle, cs_insn *ins); void print_insn_detail_arc(csh handle, cs_insn *ins); +void print_insn_detail_etca(csh handle, cs_insn *ins); #endif //CAPSTONE_CSTOOL_CSTOOL_H_ diff --git a/cstool/cstool_etca.c b/cstool/cstool_etca.c new file mode 100644 index 0000000000..82731eb475 --- /dev/null +++ b/cstool/cstool_etca.c @@ -0,0 +1,14 @@ +#include +#include + +#include +#include "cstool.h" + +void print_insn_detail_etca(csh handle, cs_insn *ins) +{ + // detail can be NULL on "data" instruction if SKIPDATA option is turned ON + if (ins->detail == NULL) + return; + + // TODO +} diff --git a/include/capstone/capstone.h b/include/capstone/capstone.h index f8d07338e1..b26511e3cd 100644 --- a/include/capstone/capstone.h +++ b/include/capstone/capstone.h @@ -105,6 +105,7 @@ typedef enum cs_arch { CS_ARCH_LOONGARCH, ///< LoongArch architecture CS_ARCH_XTENSA, ///< Xtensa architecture CS_ARCH_ARC, ///< ARC architecture + CS_ARCH_ETCA, ///< ETC.a architecture CS_ARCH_MAX, CS_ARCH_ALL = 0xFFFF, // All architectures - for cs_support() } cs_arch; @@ -226,6 +227,9 @@ typedef enum cs_mode { CS_MODE_SH4A = 1 << 5, ///< SH4A CS_MODE_SHFPU = 1 << 6, ///< w/ FPU CS_MODE_SHDSP = 1 << 7, ///< w/ DSP + CS_MODE_ETCA16 = CS_MODE_16, + CS_MODE_ETCA32 = CS_MODE_32, + CS_MODE_ETCA64 = CS_MODE_64, CS_MODE_TRICORE_110 = 1 << 1, ///< Tricore 1.1 CS_MODE_TRICORE_120 = 1 << 2, ///< Tricore 1.2 CS_MODE_TRICORE_130 = 1 << 3, ///< Tricore 1.3 @@ -439,6 +443,7 @@ typedef struct cs_opt_skipdata { #include "mos65xx.h" #include "bpf.h" #include "sh.h" +#include "etca.h" #include "tricore.h" #include "alpha.h" #include "hppa.h" @@ -497,6 +502,7 @@ typedef struct cs_detail { cs_bpf bpf; ///< Berkeley Packet Filter architecture (including eBPF) cs_riscv riscv; ///< RISCV architecture cs_sh sh; ///< SH architecture + cs_etca etca; ///< ETC.a architecture cs_tricore tricore; ///< TriCore architecture cs_alpha alpha; ///< Alpha architecture cs_hppa hppa; ///< HPPA architecture diff --git a/include/capstone/etca.h b/include/capstone/etca.h new file mode 100644 index 0000000000..bb2202fc5c --- /dev/null +++ b/include/capstone/etca.h @@ -0,0 +1,232 @@ +#ifndef CAPSTONE_ETCA_H +#define CAPSTONE_ETCA_H + +/* Capstone Disassembly Engine */ +/* By Alexander Nutz, 2025 */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "platform.h" +#include "cs_operand.h" + +#ifdef _MSC_VER +// "anonymous unions are a non-standard extension" +#pragma warning(disable : 4201) +#endif + +/// ETCA registers and special registers +typedef enum { + ETCA_REG_INVALID = 0, + + ETCA_REG_FIRST_BASE, + ETCA_REG_R0 = ETCA_REG_FIRST_BASE, + ETCA_REG_R1, + ETCA_REG_R2, + ETCA_REG_R3, + ETCA_REG_R4, + ETCA_REG_R5, + ETCA_REG_R6, + ETCA_REG_R7, + ETCA_REG_LAST_BASE = ETCA_REG_R7, + + ETCA_REG_FIRST_REX, + ETCA_REG_R8 = ETCA_REG_FIRST_REX, + ETCA_REG_R9, + ETCA_REG_R10, + ETCA_REG_R11, + ETCA_REG_R12, + ETCA_REG_R13, + ETCA_REG_R14, + ETCA_REG_R15, + ETCA_REG_LAST_REX = ETCA_REG_R15, + + ETCA_REG_ENDING, +} cs_etca_reg; + +typedef enum { + ETCA_OP_INVALID = CS_OP_INVALID, + ETCA_OP_REG = CS_OP_REG, // register operand + ETCA_OP_IMM = CS_OP_IMM, // (possibly full-) immediate operand + ETCA_OP_MEM = CS_OP_MEM, // only for MO1/MO2 memory operands +} cs_etca_op_type; + +// index + base + displacement +typedef struct { + // (1 << index_multiplier_log2) * reg[index] + struct { + bool enabled; + cs_etca_reg index; + // 2^0=1, 2^1=2, 2^2=4, 2^3=8 + uint8_t index_multiplier_log2 : 2; + } index; + + // reg[base] + struct { + bool enabled; + cs_etca_reg base; + } base; + + int64_t displacement; +} cs_etca_op_mem; + +// Instruction operand +typedef struct { + cs_etca_op_type type; + union { + uint64_t imm; // when ETCA_OP_IMM; after sign extensions + cs_etca_reg reg; // when ETCA_OP_REG + cs_etca_op_mem mem; // when ETCA_OP_MEM + }; +} cs_etca_op; + +typedef enum etca_insn { + ETCA_INS_INVALID = 0, + + ETCA_INS_NOP, + + ETCA_INS_REL_JMP, + ETCA_INS_ABS_JMP, + ETCA_INS_REL_CALL, + ETCA_INS_ABS_CALL, + + ETCA_INS_ADD, + ETCA_INS_SUB, + ETCA_INS_RSUB, + ETCA_INS_CMP, + ETCA_INS_OR, + ETCA_INS_XOR, + ETCA_INS_AND, + ETCA_INS_TEST, + ETCA_INS_MOVZ, + ETCA_INS_MOVS, + ETCA_INS_LOAD, + ETCA_INS_STORE, + ETCA_INS_SLO, + + ETCA_INS_READCR, + ETCA_INS_WRITECR, + ETCA_INS_SYSCALL, + ETCA_INS_ERET, + ETCA_INS_WAIT, + + ETCA_INS_PUSH, + ETCA_INS_POP, + ETCA_INS_LEA, + ETCA_INS_ADC, + ETCA_INS_SBB, + ETCA_INS_RSBB, + ETCA_INS_ASR, + ETCA_INS_ROL, + ETCA_INS_ROR, + ETCA_INS_SHL, + ETCA_INS_SHR, + ETCA_INS_RCL, + ETCA_INS_RCR, + ETCA_INS_POPCNT, + ETCA_INS_GREV, + ETCA_INS_CTZ, + ETCA_INS_CLZ, + ETCA_INS_NOT, + ETCA_INS_ANDN, + ETCA_INS_UDIV, + ETCA_INS_SDIV, + ETCA_INS_UREM, + ETCA_INS_SREM, + ETCA_INS_UMUL, + ETCA_INS_SMUL, + ETCA_INS_UHMUL, + ETCA_INS_SHMUL, + ETCA_INS_LSB, + ETCA_INS_LSBMSK, + ETCA_INS_RLSB, + ETCA_INS_ZHIB, + + ETCA_INS_CACHE_FLUSH_ALL, + ETCA_INS_DATA_PREFETCH, + ETCA_INS_INSTRUCTION_PREFETCH, + ETCA_INS_DCACHE_FLUSH, + ETCA_INS_ICACHE_INVALIDATE, + ETCA_INS_CACHE_INVALIDATE_ALL, + ETCA_INS_DCACHE_INVALIDATE, + ETCA_INS_ALLOC_ZERO, + + ETCA_INS_ENDING, +} etca_insn; + +#define ETCA_MAX_NUM_OP 2 + +// first bit negates the cond +#define ETCA_COND_Z (0 << 1) +#define ETCA_COND_N (1 << 1) +#define ETCA_COND_C (2 << 1) +#define ETCA_COND_O (3 << 1) +#define ETCA_COND_BE (4 << 1) +#define ETCA_COND_L (5 << 1) +#define ETCA_COND_LE (6 << 1) +#define ETCA_COND_ALWAYS (7 << 1) + +#define ETCA_COND_E ETCA_COND_Z +#define ETCA_COND_B ETCA_COND_C + +#define ETCA_COND_NZ (1 | ETCA_COND_Z) +#define ETCA_COND_NN (1 | ETCA_COND_N) +#define ETCA_COND_NC (1 | ETCA_COND_C) +#define ETCA_COND_NO (1 | ETCA_COND_O) +#define ETCA_COND_A (1 | ETCA_COND_BE) +#define ETCA_COND_GE (1 | ETCA_COND_L) +#define ETCA_COND_G (1 | ETCA_COND_LE) +#define ETCA_COND_NEVER (1 | ETCA_COND_ALWAYS) + +#define ETCA_COND_NE (1 | ETCA_COND_E) +#define ETCA_COND_AE (1 | ETCA_COND_C) + +char const *cs_etca_cond_name(uint8_t cond); + +typedef enum { + ETCA_CR_CPUID1 = 0x00, + ETCA_CR_CPUID2 = 0x01, + ETCA_CR_FEAT = 0x02, + ETCA_CR_FLAGS = 0x03, + ETCA_CR_INT_PC = 0x04, + ETCA_CR_INT_RET_PC = 0x05, + ETCA_CR_INT_MASK = 0x06, + ETCA_CR_INT_PENDING = 0x07, + ETCA_CR_INT_CAUSE = 0x08, + ETCA_CR_INT_DATA = 0x09, + ETCA_CR_INT_SCRATCH_0 = 0x0A, + ETCA_CR_INT_SCRATCH_1 = 0x0B, + ETCA_CR_PRIV = 0x0C, + ETCA_CR_INT_RET_PRIV = 0x0D, + ETCA_CR_CACHE_LINE_SIZE = 0x0E, + ETCA_CR_NO_CACHE_START = 0x0F, + ETCA_CR_NO_CACHE_END = 0x10, + ETCA_CR_MODE = 0x11, +} cs_etca_cr; + +char const *cs_etca_cr_name(cs_etca_cr cr); + +/// Instruction structure +typedef struct cs_etca { + etca_insn insn : 8; + uint8_t ss : 2; + uint8_t cond : 4; + uint8_t op_count : 2; + cs_etca_op operands[ETCA_MAX_NUM_OP]; +} cs_etca; + +// instructions can have multiple groups +typedef enum { + ETCA_GRP_INVALID = 0, + ETCA_GRP_JUMP, + ETCA_GRP_CALL, + ETCA_GRP_PRIV, + ETCA_GRP_ENDING, +} cs_etca_insn_group; + +#ifdef __cplusplus +} +#endif + +#endif