From d8ac5bc03fdfd09cfae1d6e8484fc68ae79c1761 Mon Sep 17 00:00:00 2001 From: Herman Semenov Date: Sun, 1 Jun 2025 00:50:25 +0300 Subject: [PATCH] all: reduce type, align for 64-bit, using autopadding memholes after swap fields Using Pahole memory struct/class analyzer (from Red Hat https://linux.die.net/man/1/pahole) on object files after compilation, you can find places that are problematic for CPU cache, C/C++ compiler does not have automatic filling and alignment memholes and relies on programmer, since struct packaging can break behavior program, and for this there are keywords for packaging structures. Reduce type sizes: section-align int -> int16_t section-fileindex uint32_t -> uint16_t Reduce structure sizes: section 168 160 saved 8 bytes (using type size reduce) itemplate 80 72 saved 8 bytes coff_Section 112 96 saved 16 bytes cv8_symbol 40 32 saved 8 bytes cv8_state 152 144 saved 8 bytes Symbol 56 48 saved 8 bytes --- asm/labels.c | 2 +- asm/preproc.c | 43 ++++++++++++++++++++++--------------------- disasm/disasm.c | 5 +++-- include/nasm.h | 12 ++++++------ output/codeview.c | 21 ++++++++++----------- output/outaout.c | 2 +- output/outbin.c | 3 ++- output/outcoff.c | 2 +- output/outdbg.c | 2 +- output/outelf.c | 2 +- output/outelf.h | 2 +- output/outieee.c | 8 ++++---- output/outlib.h | 2 +- output/outmacho.c | 33 +++++++++++++++++---------------- output/outobj.c | 2 +- output/pecoff.h | 10 +++++----- 16 files changed, 77 insertions(+), 74 deletions(-) diff --git a/asm/labels.c b/asm/labels.c index f6b940fca..a0e888e85 100644 --- a/asm/labels.c +++ b/asm/labels.c @@ -115,9 +115,9 @@ union label { /* actual label structures */ enum label_type type, mangled_type; } defn; struct { - int32_t movingon; int64_t dummy; union label *next; + int32_t movingon; } admin; }; diff --git a/asm/preproc.c b/asm/preproc.c index 00c0294a0..8653b7755 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -219,9 +219,9 @@ enum sparmflags { struct smac_param { Token name; + const Token *def; /* Default, if any */ enum sparmflags flags; char radix; /* Radix type for SPARM_EVAL */ - const Token *def; /* Default, if any */ }; struct SMacro { @@ -285,15 +285,10 @@ struct MMacro { #endif char *name; int nparam_min, nparam_max; - enum nolist_flags nolist; /* is this macro listing-inhibited? */ - bool casesense; - bool plus; /* is the last parameter greedy? */ - bool capture_label; /* macro definition has %00; capture label */ int32_t in_progress; /* is this macro currently being expanded? */ int32_t max_depth; /* maximum number of recursive expansions allowed */ Token *dlist; /* All defaults as one list */ Token **defaults; /* Parameter default pointers */ - int ndefs; /* number of default parameters */ Line *expansion; struct mstk mstk; /* Macro expansion stack */ @@ -310,6 +305,12 @@ struct MMacro { struct debug_macro_def *def; /* Definition */ struct debug_macro_inv *inv; /* Current invocation (if any) */ } dbg; + + int ndefs; /* number of default parameters */ + enum nolist_flags nolist; /* is this macro listing-inhibited? */ + bool casesense; + bool plus; /* is the last parameter greedy? */ + bool capture_label; /* macro definition has %00; capture label */ }; @@ -7595,28 +7596,28 @@ stdmac_map(const SMacro *s, Token **params, int nparam) /* Add magic standard macros */ struct magic_macros { const char *name; - bool casesense; + ExpandSMacro func; int nparam; enum sparmflags flags; - ExpandSMacro func; + bool casesense; }; static void pp_add_magic_stdmac(void) { static const struct magic_macros magic_macros[] = { - { "__?FILE?__", true, 0, 0, stdmac_file }, - { "__?LINE?__", true, 0, 0, stdmac_line }, - { "__?BITS?__", true, 0, 0, stdmac_bits }, - { "__?PTR?__", true, 0, 0, stdmac_ptr }, - { "%abs", false, 1, SPARM_EVAL, stdmac_abs }, - { "%count", false, 1, SPARM_VARADIC, stdmac_count }, - { "%eval", false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join }, - { "%map", false, 1, SPARM_VARADIC, stdmac_map }, - { "%str", false, 1, SPARM_GREEDY|SPARM_STR, stdmac_join }, - { "%strcat", false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_strcat }, - { "%strlen", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_strlen }, - { "%tok", false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_tok }, - { NULL, false, 0, 0, NULL } + { "__?FILE?__", stdmac_file, 0, 0, true }, + { "__?LINE?__", stdmac_line, 0, 0, true }, + { "__?BITS?__", stdmac_bits, 0, 0, true }, + { "__?PTR?__", stdmac_ptr, 0, 0, true }, + { "%abs", stdmac_abs, 1, SPARM_EVAL, false }, + { "%count", stdmac_count, 1, SPARM_VARADIC, false }, + { "%eval", stdmac_join, 1, SPARM_EVAL|SPARM_VARADIC, false }, + { "%map", stdmac_map, 1, SPARM_VARADIC, false }, + { "%str", stdmac_join, 1, SPARM_GREEDY|SPARM_STR, false }, + { "%strcat", stdmac_strcat, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, false }, + { "%strlen", stdmac_strlen, 1, SPARM_STR|SPARM_CONDQUOTE, false }, + { "%tok", stdmac_tok, 1, SPARM_STR|SPARM_CONDQUOTE, false }, + { NULL, NULL, 0, 0, false } }; const struct magic_macros *m; SMacro tmpl; diff --git a/disasm/disasm.c b/disasm/disasm.c index 6273f87a1..943fec6b0 100644 --- a/disasm/disasm.c +++ b/disasm/disasm.c @@ -76,6 +76,9 @@ * Prefix information */ struct prefix_info { + uint32_t rex; /* REX prefix present */ + uint8_t evex[3]; /* EVEX prefix present */ + uint8_t osize; /* Operand size */ uint8_t asize; /* Address size */ uint8_t osp; /* Operand size prefix present */ @@ -89,8 +92,6 @@ struct prefix_info { uint8_t vex_m; /* VEX.M field */ uint8_t vex_v; uint8_t vex_lp; /* VEX.LP fields */ - uint32_t rex; /* REX prefix present */ - uint8_t evex[3]; /* EVEX prefix present */ }; #define getu8(x) (*(uint8_t *)(x)) diff --git a/include/nasm.h b/include/nasm.h index 171242938..55e70c285 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -60,10 +60,10 @@ extern const char *_progname; /* Time stamp for the official start of compilation */ struct compile_time { time_t t; - bool have_local, have_gm, have_posix; int64_t posix; struct tm local; struct tm gm; + bool have_local, have_gm, have_posix; }; extern struct compile_time official_compile_time; @@ -314,10 +314,10 @@ struct tokenval { char *t_charptr; int64_t t_integer; int64_t t_inttwo; + const char *t_start; /* Pointer to token in input buffer */ + int t_len; /* Length of token in input buffer */ enum token_type t_type; int8_t t_flag; - const char *t_start; /* Pointer to token in input buffer */ - int t_len; /* Length of token in input buffer */ }; typedef int (*scanner)(void *private_data, struct tokenval *tv); @@ -760,15 +760,15 @@ typedef struct insn { /* an instruction itself */ int eops_float; /* true if DD and floating */ int32_t times; /* repeat count (TIMES prefix) */ bool rex_done; /* REX prefix emitted? */ + uint8_t evex_p[3]; /* EVEX.P0: [RXB,R',00,mm], P1: [W,vvvv,1,pp] */ + /* EVEX.P2: [z,L'L,b,V',aaa] */ int rex; /* Special REX Prefix */ int vexreg; /* Register encoded in VEX prefix */ int vex_cm; /* Class and M field for VEX prefix */ int vex_wlp; /* W, P and L information for VEX prefix */ - uint8_t evex_p[3]; /* EVEX.P0: [RXB,R',00,mm], P1: [W,vvvv,1,pp] */ - /* EVEX.P2: [z,L'L,b,V',aaa] */ - enum ttypes evex_tuple; /* Tuple type for compressed Disp8*N */ int evex_rm; /* static rounding mode for AVX512 (EVEX) */ struct operand *evex_brerop; /* BR/ER/SAE operand position */ + enum ttypes evex_tuple; /* Tuple type for compressed Disp8*N */ } insn; /* Instruction flags type: IF_* flags are defined in insns.h */ diff --git a/output/codeview.c b/output/codeview.c index d1011fcb3..9548f6f40 100644 --- a/output/codeview.c +++ b/output/codeview.c @@ -82,15 +82,16 @@ struct source_file; struct source_file { const char *filename; char *fullname; - uint32_t fullnamelen; struct source_file *next; + uint32_t fullnamelen; + uint32_t filetbl_off; uint32_t sourcetbl_off; - struct SAA *lines; uint32_t num_lines; + struct SAA *lines; unsigned char md5sum[MD5_HASHBYTES]; }; @@ -110,13 +111,8 @@ enum symbol_type { }; struct cv8_symbol { - enum symbol_type type; char *name; - - uint32_t secrel; - uint16_t section; - uint32_t size; - uint32_t typeindex; + enum symbol_type type; enum symtype { TYPE_UNREGISTERED = 0x0000, /* T_NOTYPE */ @@ -132,14 +128,17 @@ struct cv8_symbol { TYPE_REAL256= 0x0044, TYPE_REAL512= 0x0045 } symtype; + + uint32_t secrel; + uint32_t size; + uint32_t typeindex; + uint16_t section; }; struct cv8_state { int symbol_sect; int type_sect; - uint32_t text_offset; - struct source_file *source_files, **source_files_tail; const char *last_filename; struct source_file *last_source_file; @@ -147,7 +146,7 @@ struct cv8_state { unsigned num_files; uint32_t total_filename_len; - + uint32_t text_offset; unsigned total_lines; struct SAA *symbols; diff --git a/output/outaout.c b/output/outaout.c index c59c78965..ed6b01c4b 100644 --- a/output/outaout.c +++ b/output/outaout.c @@ -74,10 +74,10 @@ struct Symbol { int32_t value; /* address, or COMMON variable size */ int32_t size; /* size for data or function exports */ int32_t segment; /* back-reference used by gsym_reloc */ + int32_t symnum; /* index into symbol table */ struct Symbol *next; /* list of globals in each section */ struct Symbol *nextfwd; /* list of unresolved-size symbols */ char *name; /* for unresolved-size symbols */ - int32_t symnum; /* index into symbol table */ }; /* diff --git a/output/outbin.c b/output/outbin.c index 1522e5cbd..1af1b6495 100644 --- a/output/outbin.c +++ b/output/outbin.c @@ -115,7 +115,6 @@ static struct Section { int64_t length; /* section length in bytes */ /* Section attributes */ - int flags; /* see flag definitions above */ uint64_t align; /* section alignment */ uint64_t valign; /* notional section alignment */ uint64_t start; /* section start address */ @@ -130,6 +129,8 @@ static struct Section { struct Section *prev; /* Points to previous section (implicit follows). */ struct Section *next; /* This links sections with a defined start address. */ + int flags; /* see flag definitions above */ + /* The extended bin format allows for sections to have a "virtual" * start address. This is accomplished by creating two sections: * one beginning at the Load Memory Address and the other beginning diff --git a/output/outcoff.c b/output/outcoff.c index c2b4eb6e3..d04144651 100644 --- a/output/outcoff.c +++ b/output/outcoff.c @@ -822,8 +822,8 @@ static void coff_sect_write(struct coff_Section *sect, typedef struct tagString { struct tagString *next; - int len; char *String; + int len; } STRING; #define EXPORT_SECTION_NAME ".drectve" diff --git a/output/outdbg.c b/output/outdbg.c index e7a9a4e5e..b58dc7179 100644 --- a/output/outdbg.c +++ b/output/outdbg.c @@ -52,8 +52,8 @@ struct Section { struct Section *next; - int32_t number; char *name; + int32_t number; } *dbgsect; static unsigned long dbg_max_data_dump = 128; diff --git a/output/outelf.c b/output/outelf.c index ad8d2104d..a6138b5bf 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -116,8 +116,8 @@ static int sec_debug; struct symlininfo { int offset; int section; /* index into sects[] */ - int segto; /* internal section number */ char *name; /* shallow-copied pointer of section name */ + int segto; /* internal section number */ }; struct linelist { diff --git a/output/outelf.h b/output/outelf.h index fcb91db51..26b93a2ac 100644 --- a/output/outelf.h +++ b/output/outelf.h @@ -118,7 +118,6 @@ struct elf_section { uint64_t nrelocs; int32_t index; /* NASM index or NO_SEG if internal */ int shndx; /* ELF index */ - int type; /* SHT_* */ uint64_t align; /* alignment: power of two */ uint64_t flags; /* section flags */ int64_t pass_last_seen; @@ -128,6 +127,7 @@ struct elf_section { struct elf_reloc *head; struct elf_reloc **tail; struct rbtree *gsyms; /* global symbols in section */ + int type; /* SHT_* */ }; #endif /* OUTPUT_OUTELF_H */ diff --git a/output/outieee.c b/output/outieee.c index 7ba903666..828dc5823 100644 --- a/output/outieee.c +++ b/output/outieee.c @@ -164,6 +164,10 @@ struct ieeeObjData { struct ieeeFixupp { struct ieeeFixupp *next; + int32_t id1; + int32_t id2; + int32_t offset; + int32_t addend; enum { FT_SEG = 0, FT_REL = 1, @@ -175,10 +179,6 @@ struct ieeeFixupp { FT_EXTSEG = 7 } ftype; int16_t size; - int32_t id1; - int32_t id2; - int32_t offset; - int32_t addend; }; static int32_t ieee_entry_seg, ieee_entry_ofs; diff --git a/output/outlib.h b/output/outlib.h index 7f6a78932..00eb4a28f 100644 --- a/output/outlib.h +++ b/output/outlib.h @@ -116,13 +116,13 @@ struct ol_symhead { }; struct ol_sect { - uint32_t flags; /* Section/symbol flags */ struct ol_sect *next; /* Next section in declared order */ const char *name; /* Name of section */ struct ol_symhead syml; /* All symbols in this section */ struct ol_symhead symg; /* Global symbols in this section */ struct SAA *data; /* Contents of section */ struct SAA *reloc; /* Section relocations */ + uint32_t flags; /* Section/symbol flags */ uint32_t index; /* Primary section index */ uint32_t subindex; /* Current subsection index */ }; diff --git a/output/outmacho.c b/output/outmacho.c index 1e776f52f..74fd4a345 100644 --- a/output/outmacho.c +++ b/output/outmacho.c @@ -112,21 +112,10 @@ static void fwriteptr(uint64_t data, FILE * fp) } struct section { - /* nasm internal data */ - struct section *next; - struct SAA *data; - int32_t index; /* Main section index */ - int32_t subsection; /* Current subsection index */ - int32_t fileindex; - struct reloc *relocs; - struct rbtree *syms[2]; /* All/global symbols symbols in section */ - int align; - bool by_name; /* This section was specified by full MachO name */ - char namestr[34]; /* segment,section as a C string */ - /* data that goes into the file */ - char sectname[16]; /* what this section is called */ - char segname[16]; /* segment this section will be in */ + char sectname[16]; /* what this section is called */ + char segname[16]; /* segment this section will be in */ + uint64_t addr; /* in-memory address (subject to alignment) */ uint64_t size; /* in-memory and -file size */ uint64_t offset; /* in-file offset */ @@ -134,6 +123,18 @@ struct section { uint32_t nreloc; /* relocation entry count */ uint32_t flags; /* type and attributes (masked) */ uint32_t extreloc; /* external relocations */ + + /* nasm internal data */ + struct section *next; + struct SAA *data; + int32_t index; /* Main section index */ + int32_t subsection; /* Current subsection index */ + struct reloc *relocs; + struct rbtree *syms[2]; /* All/global symbols symbols in section */ + char namestr[34]; /* segment,section as a C string */ + int align; /* align changed int -> int16_t for reduce cache size and fill mem holes */ + uint32_t fileindex; + bool by_name; /* This section was specified by full MachO name */ }; #define S_NASM_TYPE_MASK 0x800004ff /* we consider these bits "section type" */ @@ -300,8 +301,8 @@ struct file_list { struct file_list *next; struct file_list *last; const char *file_name; - uint32_t file; struct dir_list *dir; + uint32_t file; }; struct dw_sect_list { @@ -309,9 +310,9 @@ struct dw_sect_list { int32_t section; uint32_t line; uint64_t offset; - uint32_t file; struct dw_sect_list *next; struct dw_sect_list *last; + uint32_t file; }; struct section_info { diff --git a/output/outobj.c b/output/outobj.c index 281839d0e..83f0834d5 100644 --- a/output/outobj.c +++ b/output/outobj.c @@ -613,8 +613,8 @@ static struct ImpDef { struct ImpDef *next; char *extname; char *libname; - unsigned int impindex; char *impname; + unsigned int impindex; } *imphead, **imptail; static struct ExpDef { diff --git a/output/pecoff.h b/output/pecoff.h index b99bed09c..b41723329 100644 --- a/output/pecoff.h +++ b/output/pecoff.h @@ -476,24 +476,24 @@ struct coff_Section { struct SAA *data; + struct coff_Reloc *head, **tail; uint32_t len; int nrelocs; int32_t index; - struct coff_Reloc *head, **tail; uint32_t flags; /* section flags */ uint32_t align_flags; /* user-specified alignment flags */ uint32_t sectalign_flags; /* minimum alignment from sectalign */ char *name; + int64_t pass_last_seen; int32_t namepos; /* Offset of name into the strings table */ int32_t pos, relpos; - int64_t pass_last_seen; /* comdat-related members */ - char *comdat_name; uint32_t checksum; /* set only for comdat sections */ + char *comdat_name; + int32_t comdat_associated; /* associated section for selection==5 */ int8_t comdat_selection; int8_t comdat_symbol; /* is the "comdat name" in symbol table? */ - int32_t comdat_associated; /* associated section for selection==5 */ }; struct coff_Reloc { @@ -509,11 +509,11 @@ struct coff_Reloc { }; struct coff_Symbol { - char name[9]; int32_t strpos; /* string table position of name */ int32_t value; /* address, or COMMON variable size */ int section; /* section number where it's defined * - in COFF codes, not NASM codes */ + char name[9]; bool is_global; /* is it a global symbol or not? */ int16_t type; /* 0 - notype, 0x20 - function */ int32_t namlen; /* full name length */