Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions COMPLIANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ This document tracks compliance gaps and non-standard behaviors.
## Implemented Features

### Core Language
- Basic types: `int`, `char`, `void`, `_Bool`
- Basic types: `int`, `short`, `char`, `void`, `_Bool`
- Structures and unions with nested definitions
- Enumerations with automatic value assignment
- Function definitions and declarations
Expand Down Expand Up @@ -58,7 +58,6 @@ This document tracks compliance gaps and non-standard behaviors.

| Feature | Status | Notes |
|---------|--------|-------|
| `short` | Missing | Only 4-byte integers |
| `long` | Missing | Only 4-byte integers |
| `long long` | Missing | No 64-bit integers |
| `unsigned` | Missing | All integers are signed |
Expand Down
40 changes: 27 additions & 13 deletions src/arm-codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,10 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
elf_offset += 24;
return;
case OP_trunc:
elf_offset += 4;
if (ph2_ir->src1 == 2)
elf_offset += 8;
else
elf_offset += 4;
return;
case OP_sign_ext:
elf_offset += 4;
Expand Down Expand Up @@ -261,6 +264,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
case OP_read:
if (ph2_ir->src1 == 1)
emit(__lb(__AL, rd, rn, 0));
else if (ph2_ir->src1 == 2)
emit(__lh(__AL, rd, rn, 0));
else if (ph2_ir->src1 == 4)
emit(__lw(__AL, rd, rn, 0));
else
Expand All @@ -269,6 +274,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
case OP_write:
if (ph2_ir->dest == 1)
emit(__sb(__AL, rm, rn, 0));
else if (ph2_ir->dest == 2)
emit(__sh(__AL, rm, rn, 0));
else if (ph2_ir->dest == 4)
emit(__sw(__AL, rm, rn, 0));
else
Expand Down Expand Up @@ -432,20 +439,27 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
emit(__mov_i(__EQ, rd, 1));
return;
case OP_trunc:
if (rm == 1)
rm = 0xFF;
else if (rm == 2)
rm = 0xFFFF;
else if (rm == 4)
rm = 0xFFFFFFFF;
else
if (rm == 1) {
emit(__and_i(__AL, rd, rn, 0xFF));
} else if (rm == 2) {
emit(__sll_amt(__AL, 0, logic_ls, rd, rn, 16));
emit(__sll_amt(__AL, 0, logic_rs, rd, rd, 16));
} else if (rm == 4) {
emit(__mov_r(__AL, rd, rn));
} else {
fatal("Unsupported truncation operation with invalid target size");

emit(__and_i(__AL, rd, rn, rm));
}
return;
case OP_sign_ext:
/* TODO: Support sign extension to types other than int */
emit(__sxtb(__AL, rd, rn, 0));
case OP_sign_ext: {
/* Decode source size from upper 16 bits */
int source_size = (rm >> 16) & 0xFFFF;
if (source_size == 2) {
emit(__sxth(__AL, rd, rn, 0));
} else {
/* For other cases, use byte extension (original behavior) */
emit(__sxtb(__AL, rd, rn, 0));
}
}
return;
case OP_cast:
/* Generic cast operation - for now, just move the value */
Expand Down
54 changes: 54 additions & 0 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,39 @@ int __zero(int rd)
return __mov_i(__AL, rd, 0);
}

/* ARM halfword transfer (immediate offset) using special encoding
* For halfword: bits[11:8] = imm4H, bits[7:4] = encoding, bits[3:0] = imm4L
* imm4H: upper 4 bits of offset
* imm4L: lower 4 bits of offset
* encoding: 0b1011 for unsigned halfword, 0b1111 for signed halfword
*/
int arm_halfword_transfer(arm_cond_t cond,
int l,
arm_reg rn,
arm_reg rd,
int ofs,
int signed_op)
{
int opcode = 16 + 8 + 4 + l;

if (ofs < 0) {
opcode -= 8;
ofs = -ofs;
}

if (ofs > 255)
error("Halfword offset too large");

/* Halfword encoding: split offset into 4-bit high and low parts */
int imm4H = ((ofs >> 4) & 0xF) << 8;
int imm4L = ofs & 0xF;

/* Encode lower 8 bits: 1011xxxx for unsigned, 1111xxxx for signed */
int encoded_ofs = imm4H | 0xB0 | imm4L | (signed_op << 6);

return arm_encode(cond, opcode, rn, rd, encoded_ofs);
}

int arm_transfer(arm_cond_t cond,
int l,
int size,
Expand Down Expand Up @@ -302,6 +335,18 @@ int __sb(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
return arm_transfer(cond, 0, 1, rn, rd, ofs);
}

/* ARM signed halfword load (LDRSH) */
int __lh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
{
return arm_halfword_transfer(cond, 1, rn, rd, ofs, 1);
}

/* ARM halfword store (STRH) */
int __sh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
{
return arm_halfword_transfer(cond, 0, rn, rd, ofs, 0);
}

int __stmdb(arm_cond_t cond, int w, arm_reg rn, int reg_list)
{
return arm_encode(cond, arm_stmdb + (0x2 << 6) + (w << 1), rn, 0, reg_list);
Expand Down Expand Up @@ -373,3 +418,12 @@ int __sxtb(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation)
return arm_encode(cond, 106, 0xF, rd,
rm | ((rotation >> 3) << 10) | (0x7 << 4));
}

int __sxth(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation)
{
if (rotation != 0 && rotation != 8 && rotation != 16 && rotation != 24)
fatal("SXTH rotation must be 0, 8, 16, or 24");

return arm_encode(cond, 107, 0xF, rd,
rm | ((rotation >> 3) << 10) | (0x7 << 4));
}
1 change: 1 addition & 0 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ typedef enum {
TYPE_void = 0,
TYPE_int,
TYPE_char,
TYPE_short,
TYPE_struct,
TYPE_union,
TYPE_typedef
Expand Down
1 change: 1 addition & 0 deletions src/globals.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ type_t *TY_void;
type_t *TY_char;
type_t *TY_bool;
type_t *TY_int;
type_t *TY_short;

/* Arenas */

Expand Down
50 changes: 46 additions & 4 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,18 @@ var_t *promote_unchecked(block_t *block,
{
var_t *rd = require_typed_ptr_var(block, target_type, target_ptr);
gen_name_to(rd->var_name);
add_insn(block, *bb, OP_sign_ext, rd, var, NULL,
target_ptr ? PTR_SIZE : target_type->size, NULL);
/* Encode both source and target sizes in src1:
* Lower 16 bits: target size
* Upper 16 bits: source size
* This allows codegen to distinguish between different promotion types
* without changing IR semantics.
*/
int encoded_size = ((var->type->size) << 16);
if (target_ptr)
encoded_size |= PTR_SIZE;
else
encoded_size |= target_type->size;
add_insn(block, *bb, OP_sign_ext, rd, var, NULL, encoded_size, NULL);
return rd;
}

Expand Down Expand Up @@ -1598,6 +1608,9 @@ void handle_single_dereference(block_t *parent, basic_block_t **bb)
case TYPE_char:
sz = TY_char->size;
break;
case TYPE_short:
sz = TY_short->size;
break;
case TYPE_int:
sz = TY_int->size;
break;
Expand Down Expand Up @@ -1678,6 +1691,9 @@ void handle_multiple_dereference(block_t *parent, basic_block_t **bb)
case TYPE_char:
sz = TY_char->size;
break;
case TYPE_short:
sz = TY_short->size;
break;
case TYPE_int:
sz = TY_int->size;
break;
Expand Down Expand Up @@ -1980,6 +1996,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
add_insn(parent, *bb, OP_load_constant, compound_var, NULL,
NULL, 0, NULL);
} else if (cast_or_literal_type->base_type == TYPE_int ||
cast_or_literal_type->base_type == TYPE_short ||
cast_or_literal_type->base_type == TYPE_char) {
/* Handle empty compound literals */
if (lex_peek(T_close_curly, NULL)) {
Expand Down Expand Up @@ -2266,6 +2283,8 @@ int get_pointer_element_size(var_t *ptr_var)
switch (ptr_var->type->base_type) {
case TYPE_char:
return TY_char->size;
case TYPE_short:
return TY_short->size;
case TYPE_int:
return TY_int->size;
case TYPE_void:
Expand All @@ -2280,6 +2299,8 @@ int get_pointer_element_size(var_t *ptr_var)
switch (ptr_var->type->base_type) {
case TYPE_char:
return TY_char->size;
case TYPE_short:
return TY_short->size;
case TYPE_int:
return TY_int->size;
case TYPE_void:
Expand Down Expand Up @@ -2388,6 +2409,9 @@ void handle_pointer_arithmetic(block_t *parent,
case TYPE_char:
element_size = 1;
break;
case TYPE_short:
element_size = 2;
break;
case TYPE_int:
element_size = 4;
break;
Expand All @@ -2406,6 +2430,9 @@ void handle_pointer_arithmetic(block_t *parent,
case TYPE_char:
element_size = 1;
break;
case TYPE_short:
element_size = 2;
break;
case TYPE_int:
element_size = 4;
break;
Expand Down Expand Up @@ -2865,6 +2892,9 @@ void read_lvalue(lvalue_t *lvalue,
case TYPE_char:
lvalue->size = TY_char->size;
break;
case TYPE_short:
lvalue->size = TY_short->size;
break;
case TYPE_int:
lvalue->size = TY_int->size;
break;
Expand Down Expand Up @@ -3096,6 +3126,9 @@ void read_lvalue(lvalue_t *lvalue,
case TYPE_char:
increment_size = TY_char->size;
break;
case TYPE_short:
increment_size = TY_short->size;
break;
case TYPE_int:
increment_size = TY_int->size;
break;
Expand Down Expand Up @@ -3431,6 +3464,9 @@ bool read_body_assignment(char *token,
case TYPE_char:
increment_size = TY_char->size;
break;
case TYPE_short:
increment_size = TY_short->size;
break;
case TYPE_int:
increment_size = TY_int->size;
break;
Expand Down Expand Up @@ -4237,7 +4273,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
*/
if (expr_result && expr_result->array_size > 0 &&
!var->ptr_level && var->array_size == 0 && var->type &&
var->type->base_type == TYPE_int &&
(var->type->base_type == TYPE_int ||
var->type->base_type == TYPE_short) &&
expr_result->var_name[0] == '.') {
var_t *first_elem = require_var(parent);
first_elem->type = var->type;
Expand Down Expand Up @@ -4525,7 +4562,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
/* Handle array compound literal to scalar assignment */
if (expr_result && expr_result->array_size > 0 &&
!var->ptr_level && var->array_size == 0 && var->type &&
var->type->base_type == TYPE_int &&
(var->type->base_type == TYPE_int ||
var->type->base_type == TYPE_short) &&
expr_result->var_name[0] == '.') {
/* Extract first element from compound literal array */
var_t *first_elem = require_var(parent);
Expand Down Expand Up @@ -5210,6 +5248,10 @@ void parse_internal(void)
TY_int->base_type = TYPE_int;
TY_int->size = 4;

TY_short = add_named_type("short");
TY_short->base_type = TYPE_short;
TY_short->size = 2;

/* builtin type _Bool was introduced in C99 specification, it is more
* well-known as macro type bool, which is defined in <std_bool.h> (in
* shecc, it is defined in 'lib/c.c').
Expand Down
2 changes: 2 additions & 0 deletions src/reg-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ void reg_alloc(void)
if (global_insn->rd->ptr_level)
GLOBAL_FUNC->stack_size += PTR_SIZE;
else if (global_insn->rd->type != TY_int &&
global_insn->rd->type != TY_short &&
global_insn->rd->type != TY_char &&
global_insn->rd->type != TY_bool) {
GLOBAL_FUNC->stack_size +=
Expand Down Expand Up @@ -585,6 +586,7 @@ void reg_alloc(void)
case OP_allocat:
if ((insn->rd->type == TY_void ||
insn->rd->type == TY_int ||
insn->rd->type == TY_short ||
insn->rd->type == TY_char ||
insn->rd->type == TY_bool) &&
insn->rd->array_size == 0)
Expand Down
Loading