From ab9de461a62dd152992143923b31b7bceee431f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Aug 2025 07:56:40 +0200 Subject: [PATCH 1/3] Share some binary syntax code between JIT back-ends Break out code for binary construction and matching that do not depend on the architecture (x86_64 or AArch64). --- erts/emulator/Makefile.in | 1 + erts/emulator/beam/beam_file.c | 1 + erts/emulator/beam/jit/arm/instr_bs.cpp | 471 +++--------------------- erts/emulator/beam/jit/beam_jit_bs.cpp | 442 ++++++++++++++++++++++ erts/emulator/beam/jit/beam_jit_bs.hpp | 107 ++++++ erts/emulator/beam/jit/x86/instr_bs.cpp | 438 +--------------------- 6 files changed, 599 insertions(+), 861 deletions(-) create mode 100644 erts/emulator/beam/jit/beam_jit_bs.cpp create mode 100644 erts/emulator/beam/jit/beam_jit_bs.hpp diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 5d6c12c8a544..4e098373c8b1 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -1056,6 +1056,7 @@ JIT_OBJS = \ $(OBJDIR)/asm_load.o \ $(OBJDIR)/beam_asm_global.o \ $(OBJDIR)/beam_asm_module.o \ + $(OBJDIR)/beam_jit_bs.o \ $(OBJDIR)/beam_jit_common.o \ $(OBJDIR)/beam_jit_main.o \ $(OBJDIR)/beam_jit_metadata.o \ diff --git a/erts/emulator/beam/beam_file.c b/erts/emulator/beam/beam_file.c index af807d97d06f..e65f05661019 100644 --- a/erts/emulator/beam/beam_file.c +++ b/erts/emulator/beam/beam_file.c @@ -1490,6 +1490,7 @@ Eterm beamfile_get_literal(const BeamFile *beam, Sint index) { /* Static literal indexes are checked when parsing operations, and dynamic * literals are managed by us, so we can only land here through a bug. */ + erts_printf("%ld\n", index); ERTS_INTERNAL_ERROR("illegal literal index"); } diff --git a/erts/emulator/beam/jit/arm/instr_bs.cpp b/erts/emulator/beam/jit/arm/instr_bs.cpp index 2c0c0633ff1b..d00314af75db 100644 --- a/erts/emulator/beam/jit/arm/instr_bs.cpp +++ b/erts/emulator/beam/jit/arm/instr_bs.cpp @@ -21,6 +21,7 @@ */ #include "beam_asm.hpp" +#include "beam_jit_bs.hpp" #include extern "C" @@ -1080,121 +1081,6 @@ void BeamGlobalAssembler::emit_get_sint64_shared() { } } -struct BscSegment { - BscSegment() - : type(am_false), unit(1), flags(0), src(ArgNil()), size(ArgNil()), - error_info(0), offsetInAccumulator(0), effectiveSize(-1), - action(action::DIRECT) { - } - - Eterm type; - Uint unit; - Uint flags; - ArgVal src; - ArgVal size; - - Uint error_info; - Uint offsetInAccumulator; - Sint effectiveSize; - - /* Here are sub actions for storing integer segments. - * - * We use the ACCUMULATE action to accumulator values of segments - * with known, small sizes (no more than 64 bits) into an - * accumulator register. - * - * When no more segments can be accumulated, the STORE action is - * used to store the value of the accumulator into the binary. - * - * The DIRECT action is used when it is not possible to use the - * accumulator (for unknown or too large sizes). - */ - enum class action { DIRECT, ACCUMULATE, STORE } action; -}; - -static std::vector bs_combine_segments( - const std::vector segments) { - std::vector segs; - - for (auto seg : segments) { - switch (seg.type) { - case am_integer: { - if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) { - /* Unknown or too large size. Handle using the default - * DIRECT action. */ - segs.push_back(seg); - continue; - } - - if (seg.flags & BSF_LITTLE || segs.size() == 0 || - segs.back().action == BscSegment::action::DIRECT) { - /* There are no previous compatible ACCUMULATE / STORE - * actions. Create the first ones. */ - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - continue; - } - - auto prev = segs.back(); - if (prev.flags & BSF_LITTLE) { - /* Little-endian segments cannot be combined with other - * segments. Create new ACCUMULATE / STORE actions. */ - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - continue; - } - - /* The current segment is compatible with the previous - * segment. Try combining them. */ - if (prev.effectiveSize + seg.effectiveSize <= 64) { - /* The combined values of the segments fit in the - * accumulator. Insert an ACCUMULATE action for the - * current segment before the pre-existing STORE - * action. */ - segs.pop_back(); - prev.effectiveSize += seg.effectiveSize; - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - segs.push_back(prev); - } else { - /* The size exceeds 64 bits. Can't combine. */ - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - } - break; - } - default: - segs.push_back(seg); - break; - } - } - - /* Calculate bit offsets for each ACCUMULATE segment. */ - - Uint offset = 0; - for (int i = segs.size() - 1; i >= 0; i--) { - switch (segs[i].action) { - case BscSegment::action::STORE: - offset = 64 - segs[i].effectiveSize; - break; - case BscSegment::action::ACCUMULATE: - segs[i].offsetInAccumulator = offset; - offset += segs[i].effectiveSize; - break; - default: - break; - } - } - - return segs; -} - /* * In: * bin_offset = register to store the bit offset into the binary @@ -1584,7 +1470,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, const Span &args) { Uint num_bits = 0; Uint estimated_num_bits = 0; - std::size_t n = args.size(); std::vector segments; Label error; /* Intentionally uninitialized */ ArgWord Live = Live0; @@ -1592,49 +1477,9 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, Sint allocated_size = -1; bool need_error_handler = false; - /* - * Collect information about each segment and calculate sizes of - * fixed segments. - */ - for (std::size_t i = 0; i < n; i += 6) { - BscSegment seg; - JitBSCOp bsc_op; - Uint bsc_segment; - - seg.type = args[i].as().get(); - bsc_segment = args[i + 1].as().get(); - seg.unit = args[i + 2].as().get(); - seg.flags = args[i + 3].as().get(); - seg.src = args[i + 4]; - seg.size = args[i + 5]; - - switch (seg.type) { - case am_float: - bsc_op = BSC_OP_FLOAT; - break; - case am_integer: - bsc_op = BSC_OP_INTEGER; - break; - case am_utf8: - bsc_op = BSC_OP_UTF8; - break; - case am_utf16: - bsc_op = BSC_OP_UTF16; - break; - case am_utf32: - bsc_op = BSC_OP_UTF32; - break; - default: - bsc_op = BSC_OP_BITSTRING; - break; - } - - /* - * Save segment number and operation for use in extended - * error information. - */ - seg.error_info = beam_jit_set_bsc_segment_op(bsc_segment, bsc_op); + segments = beam_jit_bsc_init(args); + for (auto &seg : segments) { /* * Test whether we can omit the code for the error handler. */ @@ -1713,8 +1558,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, sizeReg = ARG8; need_error_handler = true; } - - segments.insert(segments.end(), seg); } if (need_error_handler && Fail.get() != 0) { @@ -2159,7 +2002,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, } a.str(ARG1, TMP_MEM1q); - segments = bs_combine_segments(segments); + segments = beam_jit_bsc_combine_segments(segments); /* Keep track of the bit offset from the being of the binary. * Set to -1 if offset is not known (when a segment of unknown @@ -2296,6 +2139,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, break; case am_integer: switch (seg.action) { + case BscSegment::action::ACCUMULATE_FIRST: case BscSegment::action::ACCUMULATE: { /* Shift an integer of known size (no more than 64 bits) * into a word-size accumulator. */ @@ -2673,32 +2517,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, * Here follows the bs_match instruction and friends. */ -struct BsmSegment { - BsmSegment() - : action(action::TEST_HEAP), live(ArgNil()), size(0), unit(1), - flags(0), dst(ArgXRegister(0)){}; - - enum class action { - TEST_HEAP, - ENSURE_AT_LEAST, - ENSURE_EXACTLY, - READ, - EXTRACT_BITSTRING, - EXTRACT_INTEGER, - GET_INTEGER, - GET_BITSTRING, - SKIP, - DROP, - GET_TAIL, - EQ - } action; - ArgVal live; - Uint size; - Uint unit; - Uint flags; - ArgRegister dst; -}; - void BeamModuleAssembler::emit_read_bits(Uint bits, const a64::Gp bin_base, const a64::Gp bin_offset, @@ -3113,175 +2931,6 @@ void BeamModuleAssembler::emit_extract_bitstring(const a64::Gp bitdata, flush_var(dst); } -static std::vector opt_bsm_segments( - const std::vector segments, - const ArgWord &Need, - const ArgWord &Live) { - std::vector segs; - - Uint heap_need = Need.get(); - - /* - * First calculate the total number of heap words needed for - * bignums and binaries. - */ - for (auto seg : segments) { - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - if (seg.size >= SMALL_BITS) { - heap_need += BIG_NEED_FOR_BITS(seg.size); - } - break; - case BsmSegment::action::GET_BITSTRING: - heap_need += erts_extracted_bitstring_size(seg.size); - break; - case BsmSegment::action::GET_TAIL: - heap_need += BUILD_SUB_BITSTRING_HEAP_NEED; - break; - default: - break; - } - } - - int index = 0; - int read_action_pos = -1; - - index = 0; - for (auto seg : segments) { - if (heap_need != 0 && seg.live.isWord()) { - BsmSegment s = seg; - - read_action_pos = -1; - s.action = BsmSegment::action::TEST_HEAP; - s.size = heap_need; - segs.push_back(s); - index++; - heap_need = 0; - } - - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - case BsmSegment::action::GET_BITSTRING: - if (seg.size > 64) { - read_action_pos = -1; - } else { - if ((seg.flags & BSF_LITTLE) != 0 || read_action_pos < 0 || - seg.size + segs.at(read_action_pos).size > 64) { - BsmSegment s; - - /* Create a new READ action. */ - read_action_pos = index; - s.action = BsmSegment::action::READ; - s.size = seg.size; - segs.push_back(s); - index++; - } else { - /* Reuse previous READ action. */ - segs.at(read_action_pos).size += seg.size; - } - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - seg.action = BsmSegment::action::EXTRACT_INTEGER; - break; - case BsmSegment::action::GET_BITSTRING: - seg.action = BsmSegment::action::EXTRACT_BITSTRING; - break; - default: - break; - } - } - segs.push_back(seg); - break; - case BsmSegment::action::EQ: { - if (read_action_pos < 0 || - seg.size + segs.at(read_action_pos).size > 64) { - BsmSegment s; - - /* Create a new READ action. */ - read_action_pos = index; - s.action = BsmSegment::action::READ; - s.size = seg.size; - segs.push_back(s); - index++; - } else { - /* Reuse previous READ action. */ - segs.at(read_action_pos).size += seg.size; - } - auto &prev = segs.back(); - if (prev.action == BsmSegment::action::EQ && - prev.size + seg.size <= 64) { - /* Coalesce with the previous EQ instruction. */ - prev.size += seg.size; - prev.unit = prev.unit << seg.size | seg.unit; - index--; - } else { - segs.push_back(seg); - } - break; - } - case BsmSegment::action::SKIP: - if (read_action_pos >= 0 && - seg.size + segs.at(read_action_pos).size <= 64) { - segs.at(read_action_pos).size += seg.size; - seg.action = BsmSegment::action::DROP; - } else { - read_action_pos = -1; - } - segs.push_back(seg); - break; - default: - read_action_pos = -1; - segs.push_back(seg); - break; - } - index++; - } - - /* Handle a trailing test_heap instruction (for the - * i_bs_match_test_heap instruction). */ - if (heap_need) { - BsmSegment seg; - - seg.action = BsmSegment::action::TEST_HEAP; - seg.size = heap_need; - seg.live = Live; - segs.push_back(seg); - } - return segs; -} - -UWord BeamModuleAssembler::bs_get_flags(const ArgVal &val) { - if (val.isNil()) { - return 0; - } else if (val.isLiteral()) { - Eterm term = beamfile_get_literal(beam, val.as().get()); - UWord flags = 0; - - while (is_list(term)) { - Eterm *consp = list_val(term); - Eterm elem = CAR(consp); - switch (elem) { - case am_little: - case am_native: - flags |= BSF_LITTLE; - break; - case am_signed: - flags |= BSF_SIGNED; - break; - } - term = CDR(consp); - } - ASSERT(is_nil(term)); - return flags; - } else if (val.isWord()) { - /* Originates from bs_get_integer2 instruction. */ - return val.as().get(); - } else { - ASSERT(0); /* Should not happen. */ - return 0; - } -} - void BeamModuleAssembler::emit_i_bs_match(ArgLabel const &Fail, ArgRegister const &Ctx, Span const &List) { @@ -3300,79 +2949,8 @@ void BeamModuleAssembler::emit_i_bs_match_test_heap(ArgLabel const &Fail, std::vector segments; - auto current = List.begin(); - auto end = List.begin() + List.size(); - - while (current < end) { - auto cmd = current++->as().get(); - BsmSegment seg; - - switch (cmd) { - case am_ensure_at_least: { - seg.action = BsmSegment::action::ENSURE_AT_LEAST; - seg.size = current[0].as().get(); - seg.unit = current[1].as().get(); - current += 2; - break; - } - case am_ensure_exactly: { - seg.action = BsmSegment::action::ENSURE_EXACTLY; - seg.size = current[0].as().get(); - current += 1; - break; - } - case am_binary: - case am_integer: { - auto size = current[2].as().get(); - auto unit = current[3].as().get(); - - switch (cmd) { - case am_integer: - seg.action = BsmSegment::action::GET_INTEGER; - break; - case am_binary: - seg.action = BsmSegment::action::GET_BITSTRING; - break; - } - - seg.live = current[0]; - seg.size = size * unit; - seg.unit = unit; - seg.flags = bs_get_flags(current[1]); - seg.dst = current[4].as(); - current += 5; - break; - } - case am_get_tail: { - seg.action = BsmSegment::action::GET_TAIL; - seg.live = current[0].as(); - seg.dst = current[2].as(); - current += 3; - break; - } - case am_skip: { - seg.action = BsmSegment::action::SKIP; - seg.size = current[0].as().get(); - seg.flags = 0; - current += 1; - break; - } - case am_Eq: { - seg.action = BsmSegment::action::EQ; - seg.live = current[0]; - seg.size = current[1].as().get(); - seg.unit = current[2].as().get(); - current += 3; - break; - } - default: - abort(); - break; - } - segments.push_back(seg); - } - - segments = opt_bsm_segments(segments, Need, Live); + segments = beam_jit_bsm_init(beam, List); + segments = beam_jit_opt_bsm_segments(segments, Need, Live); const a64::Gp bin_base = ARG2; const a64::Gp bin_position = ARG3; @@ -3517,6 +3095,41 @@ void BeamModuleAssembler::emit_i_bs_match_test_heap(ArgLabel const &Fail, Dst); break; } + case BsmSegment::action::READ_INTEGER: { + auto bits = seg.size; + auto flags = seg.flags; + auto Dst = seg.dst; + + comment("read integer %ld", seg.size); + if (seg.size == 0) { + comment("(nothing to read)"); + } else { + auto ctx = load_source(Ctx, ARG1); + + if (!position_is_valid) { + a.ldur(bin_position, emit_boxed_val(ctx.reg, start_offset)); + position_is_valid = true; + } + + a.ldur(bin_base, emit_boxed_val(ctx.reg, base_offset)); + a.and_(bin_base, bin_base, imm(~ERL_SUB_BITS_FLAG_MASK)); + + emit_read_bits(seg.size, bin_base, bin_position, bitdata); + + a.add(bin_position, bin_position, imm(seg.size)); + a.stur(bin_position, emit_boxed_val(ctx.reg, start_offset)); + } + + offset_in_bitdata = 64 - bits; + mov_imm(small_tag, _TAG_IMMED1_SMALL); + emit_extract_integer(bitdata, + small_tag, + flags, + offset_in_bitdata, + bits, + Dst); + break; + } case BsmSegment::action::GET_INTEGER: { /* Match integer segments with more than 64 bits. */ Uint live = seg.live.as().get(); diff --git a/erts/emulator/beam/jit/beam_jit_bs.cpp b/erts/emulator/beam/jit/beam_jit_bs.cpp new file mode 100644 index 000000000000..470f2a6e8ec4 --- /dev/null +++ b/erts/emulator/beam/jit/beam_jit_bs.cpp @@ -0,0 +1,442 @@ +/* + * %CopyrightBegin% + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright Ericsson AB 2025. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifndef ASMJIT_ASMJIT_H_INCLUDED +# include +#endif + +#include "beam_asm.hpp" +#include "beam_jit_common.hpp" +#include "beam_jit_bs.hpp" + +extern "C" +{ +#include "beam_file.h" +}; + +std::vector beam_jit_bsc_init(const Span &args) { + std::size_t n = args.size(); + std::vector segments; + + /* + * Collect information about each segment and calculate sizes of + * fixed segments. + */ + for (std::size_t i = 0; i < n; i += 6) { + BscSegment seg; + JitBSCOp bsc_op; + Uint bsc_segment; + + seg.type = args[i].as().get(); + bsc_segment = args[i + 1].as().get(); + seg.unit = args[i + 2].as().get(); + seg.flags = args[i + 3].as().get(); + seg.src = args[i + 4]; + seg.size = args[i + 5]; + + switch (seg.type) { + case am_float: + bsc_op = BSC_OP_FLOAT; + break; + case am_integer: + bsc_op = BSC_OP_INTEGER; + break; + case am_utf8: + bsc_op = BSC_OP_UTF8; + break; + case am_utf16: + bsc_op = BSC_OP_UTF16; + break; + case am_utf32: + bsc_op = BSC_OP_UTF32; + break; + default: + bsc_op = BSC_OP_BITSTRING; + break; + } + + /* + * Save segment number and operation for use in extended + * error information. + */ + seg.error_info = beam_jit_set_bsc_segment_op(bsc_segment, bsc_op); + + segments.insert(segments.end(), seg); + } + + return segments; +} + +std::vector beam_jit_bsc_combine_segments( + const std::vector segments) { + std::vector segs; + + for (auto seg : segments) { + switch (seg.type) { + case am_integer: { + if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) { + /* Unknown or too large size. Handle using the default + * DIRECT action. */ + segs.push_back(seg); + continue; + } + + if (seg.flags & BSF_LITTLE || segs.size() == 0 || + segs.back().action == BscSegment::action::DIRECT) { + /* There are no previous compatible ACCUMULATE / STORE + * actions. Create the first ones. */ + seg.action = BscSegment::action::ACCUMULATE_FIRST; + segs.push_back(seg); + seg.action = BscSegment::action::STORE; + segs.push_back(seg); + continue; + } + + auto prev = segs.back(); + if (prev.flags & BSF_LITTLE) { + /* Little-endian segments cannot be combined with other + * segments. Create new ACCUMULATE_FIRST / STORE actions. */ + seg.action = BscSegment::action::ACCUMULATE_FIRST; + segs.push_back(seg); + seg.action = BscSegment::action::STORE; + segs.push_back(seg); + continue; + } + + /* The current segment is compatible with the previous + * segment. Try combining them. */ + if (prev.effectiveSize + seg.effectiveSize <= 64) { + /* The combined values of the segments fit in the + * accumulator. Insert an ACCUMULATE action for the + * current segment before the pre-existing STORE + * action. */ + segs.pop_back(); + prev.effectiveSize += seg.effectiveSize; + seg.action = BscSegment::action::ACCUMULATE; + segs.push_back(seg); + segs.push_back(prev); + } else { + /* The size exceeds 64 bits. Can't combine. */ + seg.action = BscSegment::action::ACCUMULATE_FIRST; + segs.push_back(seg); + seg.action = BscSegment::action::STORE; + segs.push_back(seg); + } + break; + } + default: + segs.push_back(seg); + break; + } + } + + /* Calculate bit offsets for each ACCUMULATE segment. */ + + Uint offset = 0; + for (int i = segs.size() - 1; i >= 0; i--) { + switch (segs[i].action) { + case BscSegment::action::STORE: + offset = 64 - segs[i].effectiveSize; + break; + case BscSegment::action::ACCUMULATE_FIRST: + case BscSegment::action::ACCUMULATE: + segs[i].offsetInAccumulator = offset; + offset += segs[i].effectiveSize; + break; + default: + break; + } + } + + return segs; +} + +static UWord bs_get_flags(const BeamFile *beam, const ArgVal &val) { + if (val.isNil()) { + return 0; + } else if (val.isLiteral()) { + Eterm term = beamfile_get_literal(beam, val.as().get()); + UWord flags = 0; + + while (is_list(term)) { + Eterm *consp = list_val(term); + Eterm elem = CAR(consp); + switch (elem) { + case am_little: + case am_native: + flags |= BSF_LITTLE; + break; + case am_signed: + flags |= BSF_SIGNED; + break; + } + term = CDR(consp); + } + ASSERT(is_nil(term)); + return flags; + } else if (val.isWord()) { + /* Originates from bs_get_integer2 instruction. */ + return val.as().get(); + } else { + ASSERT(0); /* Should not happen. */ + return 0; + } +} + +std::vector beam_jit_bsm_init(const BeamFile *beam, + Span const &List) { + std::vector segments; + + auto current = List.begin(); + auto end = List.begin() + List.size(); + + while (current < end) { + auto cmd = current++->as().get(); + BsmSegment seg; + + switch (cmd) { + case am_ensure_at_least: { + seg.action = BsmSegment::action::ENSURE_AT_LEAST; + seg.size = current[0].as().get(); + seg.unit = current[1].as().get(); + current += 2; + break; + } + case am_ensure_exactly: { + seg.action = BsmSegment::action::ENSURE_EXACTLY; + seg.size = current[0].as().get(); + current += 1; + break; + } + case am_binary: + case am_integer: { + auto size = current[2].as().get(); + auto unit = current[3].as().get(); + + switch (cmd) { + case am_integer: + seg.action = BsmSegment::action::GET_INTEGER; + break; + case am_binary: + seg.action = BsmSegment::action::GET_BITSTRING; + break; + } + + seg.live = current[0]; + seg.size = size * unit; + seg.unit = unit; + seg.flags = bs_get_flags(beam, current[1]); + seg.dst = current[4].as(); + current += 5; + break; + } + case am_get_tail: { + seg.action = BsmSegment::action::GET_TAIL; + seg.live = current[0].as(); + seg.dst = current[2].as(); + current += 3; + break; + } + case am_skip: { + seg.action = BsmSegment::action::SKIP; + seg.size = current[0].as().get(); + seg.flags = 0; + current += 1; + break; + } + case am_Eq: { + seg.action = BsmSegment::action::EQ; + seg.live = current[0]; + seg.size = current[1].as().get(); + seg.unit = current[2].as().get(); + current += 3; + break; + } + default: + abort(); + break; + } + segments.push_back(seg); + } + + return segments; +} + +std::vector beam_jit_opt_bsm_segments( + const std::vector segments, + const ArgWord &Need, + const ArgWord &Live) { + std::vector segs; + + Uint heap_need = Need.get(); + + /* + * First calculate the total number of heap words needed for + * bignums and binaries. + */ + for (auto seg : segments) { + switch (seg.action) { + case BsmSegment::action::GET_INTEGER: + if (seg.size >= SMALL_BITS) { + heap_need += BIG_NEED_FOR_BITS(seg.size); + } + break; + case BsmSegment::action::GET_BITSTRING: + heap_need += erts_extracted_bitstring_size(seg.size); + break; + case BsmSegment::action::GET_TAIL: + heap_need += BUILD_SUB_BITSTRING_HEAP_NEED; + break; + default: + break; + } + } + + int read_action_pos = -1; + int seg_index = 0; + int count = segments.size(); + + for (int i = 0; i < count; i++) { + auto seg = segments[i]; + if (heap_need != 0 && seg.live.isWord()) { + BsmSegment s = seg; + + read_action_pos = -1; + s.action = BsmSegment::action::TEST_HEAP; + s.size = heap_need; + segs.push_back(s); + heap_need = 0; + seg_index++; + } + + switch (seg.action) { + case BsmSegment::action::GET_INTEGER: + case BsmSegment::action::GET_BITSTRING: { + bool is_common_size; + + switch (seg.size) { + case 8: + case 16: + case 32: + is_common_size = true; + break; + default: + is_common_size = false; + break; + } + + if (seg.size > 64) { + read_action_pos = -1; + } else if ((seg.flags & BSF_LITTLE) != 0 && is_common_size) { + seg.action = BsmSegment::action::READ_INTEGER; + read_action_pos = -1; + } else if (read_action_pos < 0 && + seg.action == BsmSegment::action::GET_INTEGER && + is_common_size && i + 1 == count) { + seg.action = BsmSegment::action::READ_INTEGER; + read_action_pos = -1; + } else { + if ((seg.flags & BSF_LITTLE) != 0 || read_action_pos < 0 || + seg.size + segs.at(read_action_pos).size > 64) { + BsmSegment s; + + /* Create a new READ action. */ + read_action_pos = seg_index; + s.action = BsmSegment::action::READ; + s.size = seg.size; + segs.push_back(s); + seg_index++; + } else { + /* Reuse previous READ action. */ + segs.at(read_action_pos).size += seg.size; + } + switch (seg.action) { + case BsmSegment::action::GET_INTEGER: + seg.action = BsmSegment::action::EXTRACT_INTEGER; + break; + case BsmSegment::action::GET_BITSTRING: + seg.action = BsmSegment::action::EXTRACT_BITSTRING; + break; + default: + break; + } + } + segs.push_back(seg); + break; + } + case BsmSegment::action::EQ: { + if (read_action_pos < 0 || + seg.size + segs.at(read_action_pos).size > 64) { + BsmSegment s; + + /* Create a new READ action. */ + read_action_pos = seg_index; + s.action = BsmSegment::action::READ; + s.size = seg.size; + segs.push_back(s); + seg_index++; + } else { + /* Reuse previous READ action. */ + segs.at(read_action_pos).size += seg.size; + } + auto &prev = segs.back(); + if (prev.action == BsmSegment::action::EQ && + prev.size + seg.size <= 64) { + /* Coalesce with the previous EQ instruction. */ + prev.size += seg.size; + prev.unit = prev.unit << seg.size | seg.unit; + seg_index--; + } else { + segs.push_back(seg); + } + break; + } + case BsmSegment::action::SKIP: + if (read_action_pos >= 0 && + seg.size + segs.at(read_action_pos).size <= 64) { + segs.at(read_action_pos).size += seg.size; + seg.action = BsmSegment::action::DROP; + } else { + read_action_pos = -1; + } + segs.push_back(seg); + break; + default: + read_action_pos = -1; + segs.push_back(seg); + break; + } + seg_index++; + } + + /* Handle a trailing test_heap instruction (for the + * i_bs_match_test_heap instruction). */ + if (heap_need) { + BsmSegment seg; + + seg.action = BsmSegment::action::TEST_HEAP; + seg.size = heap_need; + seg.live = Live; + segs.push_back(seg); + } + return segs; +} diff --git a/erts/emulator/beam/jit/beam_jit_bs.hpp b/erts/emulator/beam/jit/beam_jit_bs.hpp new file mode 100644 index 000000000000..5bee9a0d25e9 --- /dev/null +++ b/erts/emulator/beam/jit/beam_jit_bs.hpp @@ -0,0 +1,107 @@ +/* + * %CopyrightBegin% + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright Ericsson AB 2025. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifndef __BEAM_JIT_BS_HPP__ +#define __BEAM_JIT_BS_HPP__ + +#include "beam_jit_common.hpp" + +/* + * Constructing binaries. + */ + +struct BscSegment { + BscSegment() + : type(am_false), unit(1), flags(0), src(ArgNil()), size(ArgNil()), + error_info(0), offsetInAccumulator(0), effectiveSize(-1), + action(action::DIRECT) { + } + + Eterm type; + Uint unit; + Uint flags; + ArgVal src; + ArgVal size; + + Uint error_info; + Uint offsetInAccumulator; + Sint effectiveSize; + + /* Here are sub actions for storing integer segments. + * + * We use the ACCUMULATE_FIRST and ACCUMULATE actions to shift the + * values of segments with known, small sizes (no more than 64 bits) + * into an accumulator register. + * + * When no more segments can be accumulated, the STORE action is + * used to store the value of the accumulator into the binary. + * + * The DIRECT action is used when it is not possible to use the + * accumulator (for unknown or too large sizes). + */ + enum class action { DIRECT, ACCUMULATE_FIRST, ACCUMULATE, STORE } action; +}; + +std::vector beam_jit_bsc_init(const Span &args); + +std::vector beam_jit_bsc_combine_segments( + const std::vector segments); + +/* + * Matching binaries. + */ + +struct BsmSegment { + BsmSegment() + : action(action::TEST_HEAP), live(ArgNil()), size(0), unit(1), + flags(0), dst(ArgXRegister(0)){}; + + enum class action { + TEST_HEAP, + ENSURE_AT_LEAST, + ENSURE_EXACTLY, + READ, + EXTRACT_BITSTRING, + EXTRACT_INTEGER, + READ_INTEGER, + GET_INTEGER, + GET_BITSTRING, + SKIP, + DROP, + GET_TAIL, + EQ + } action; + ArgVal live; + Uint size; + Uint unit; + Uint flags; + ArgRegister dst; +}; + +std::vector beam_jit_bsm_init(const BeamFile *beam, + Span const &List); + +std::vector beam_jit_opt_bsm_segments( + const std::vector segments, + const ArgWord &Need, + const ArgWord &Live); +#endif diff --git a/erts/emulator/beam/jit/x86/instr_bs.cpp b/erts/emulator/beam/jit/x86/instr_bs.cpp index 13ecf9a4c297..14ea0655367d 100644 --- a/erts/emulator/beam/jit/x86/instr_bs.cpp +++ b/erts/emulator/beam/jit/x86/instr_bs.cpp @@ -21,6 +21,7 @@ */ #include "beam_asm.hpp" +#include "beam_jit_bs.hpp" #include extern "C" @@ -1175,101 +1176,6 @@ void BeamGlobalAssembler::emit_get_sint64_shared() { } } -struct BscSegment { - BscSegment() - : type(am_false), unit(1), flags(0), src(ArgNil()), size(ArgNil()), - error_info(0), effectiveSize(-1), action(action::DIRECT) { - } - - Eterm type; - Uint unit; - Uint flags; - ArgVal src; - ArgVal size; - - Uint error_info; - Sint effectiveSize; - - /* Here are sub actions for storing integer segments. - * - * We use the ACCUMULATE_FIRST and ACCUMULATE actions to shift the - * values of segments with known, small sizes (no more than 64 bits) - * into an accumulator register. - * - * When no more segments can be accumulated, the STORE action is - * used to store the value of the accumulator into the binary. - * - * The DIRECT action is used when it is not possible to use the - * accumulator (for unknown or too large sizes). - */ - enum class action { DIRECT, ACCUMULATE_FIRST, ACCUMULATE, STORE } action; -}; - -static std::vector bs_combine_segments( - const std::vector segments) { - std::vector segs; - - for (auto seg : segments) { - switch (seg.type) { - case am_integer: { - if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) { - /* Unknown or too large size. Handle using the default - * DIRECT action. */ - segs.push_back(seg); - continue; - } - - if (seg.flags & BSF_LITTLE || segs.size() == 0 || - segs.back().action == BscSegment::action::DIRECT) { - /* There are no previous compatible ACCUMULATE / STORE - * actions. Create the first ones. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - continue; - } - - auto prev = segs.back(); - if (prev.flags & BSF_LITTLE) { - /* Little-endian segments cannot be combined with other - * segments. Create new ACCUMULATE_FIRST / STORE actions. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - continue; - } - - /* The current segment is compatible with the previous - * segment. Try combining them. */ - if (prev.effectiveSize + seg.effectiveSize <= 64) { - /* The combined values of the segments fits in the - * accumulator. Insert an ACCUMULATE action for the - * current segment before the pre-existing STORE - * action. */ - segs.pop_back(); - prev.effectiveSize += seg.effectiveSize; - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - segs.push_back(prev); - } else { - /* The size exceeds 64 bits. Can't combine. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - } - break; - } - default: - segs.push_back(seg); - break; - } - } - return segs; -} - /* * In: * bin_offset = if valid, register to store the lower 32 bits @@ -1759,7 +1665,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, const ArgRegister &Dst, const Span &args) { Uint num_bits = 0; - std::size_t n = args.size(); std::vector segments; Label error; /* Intentionally uninitialized */ ArgWord Live = Live0; @@ -1768,49 +1673,9 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, bool need_error_handler = false; bool runtime_entered = false; - /* - * Collect information about each segment and calculate sizes of - * fixed segments. - */ - for (std::size_t i = 0; i < n; i += 6) { - BscSegment seg; - JitBSCOp bsc_op; - Uint bsc_segment; - - seg.type = args[i].as().get(); - bsc_segment = args[i + 1].as().get(); - seg.unit = args[i + 2].as().get(); - seg.flags = args[i + 3].as().get(); - seg.src = args[i + 4]; - seg.size = args[i + 5]; - - switch (seg.type) { - case am_float: - bsc_op = BSC_OP_FLOAT; - break; - case am_integer: - bsc_op = BSC_OP_INTEGER; - break; - case am_utf8: - bsc_op = BSC_OP_UTF8; - break; - case am_utf16: - bsc_op = BSC_OP_UTF16; - break; - case am_utf32: - bsc_op = BSC_OP_UTF32; - break; - default: - bsc_op = BSC_OP_BITSTRING; - break; - } - - /* - * Save segment number and operation for use in extended - * error information. - */ - seg.error_info = beam_jit_set_bsc_segment_op(bsc_segment, bsc_op); + segments = beam_jit_bsc_init(args); + for (auto &seg : segments) { /* * Test whether we can omit the code for the error handler. */ @@ -1888,8 +1753,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, sizeReg = active_code_ix; need_error_handler = true; } - - segments.insert(segments.end(), seg); } /* @@ -2211,7 +2074,7 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, } } - segments = bs_combine_segments(segments); + segments = beam_jit_bsc_combine_segments(segments); /* Allocate the binary. */ if (segments[0].type == am_append) { @@ -2830,33 +2693,6 @@ void BeamModuleAssembler::emit_i_bs_create_bin(const ArgLabel &Fail, * Here follows the bs_match instruction and friends. */ -struct BsmSegment { - BsmSegment() - : action(action::TEST_HEAP), live(ArgNil()), size(0), unit(1), - flags(0), dst(ArgXRegister(0)){}; - - enum class action { - TEST_HEAP, - ENSURE_AT_LEAST, - ENSURE_EXACTLY, - READ, - EXTRACT_BITSTRING, - EXTRACT_INTEGER, - READ_INTEGER, - GET_INTEGER, - GET_BITSTRING, - SKIP, - DROP, - GET_TAIL, - EQ - } action; - ArgVal live; - Uint size; - Uint unit; - Uint flags; - ArgRegister dst; -}; - void BeamModuleAssembler::emit_read_bits(Uint bits, const x86::Gp bin_base, const x86::Gp bin_offset, @@ -3292,197 +3128,6 @@ void BeamModuleAssembler::emit_extract_bitstring(const x86::Gp bitdata, } } -static std::vector opt_bsm_segments( - const std::vector segments, - const ArgWord &Need, - const ArgWord &Live) { - std::vector segs; - - Uint heap_need = Need.get(); - - /* - * First calculate the total number of heap words needed for - * bignums and binaries. - */ - for (auto seg : segments) { - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - if (seg.size >= SMALL_BITS) { - heap_need += BIG_NEED_FOR_BITS(seg.size); - } - break; - case BsmSegment::action::GET_BITSTRING: - heap_need += erts_extracted_bitstring_size(seg.size); - break; - case BsmSegment::action::GET_TAIL: - heap_need += BUILD_SUB_BITSTRING_HEAP_NEED; - break; - default: - break; - } - } - - int read_action_pos = -1; - int seg_index = 0; - int count = segments.size(); - - for (int i = 0; i < count; i++) { - auto seg = segments[i]; - if (heap_need != 0 && seg.live.isWord()) { - BsmSegment s = seg; - - read_action_pos = -1; - s.action = BsmSegment::action::TEST_HEAP; - s.size = heap_need; - segs.push_back(s); - heap_need = 0; - seg_index++; - } - - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - case BsmSegment::action::GET_BITSTRING: { - bool is_common_size; - switch (seg.size) { - case 8: - case 16: - case 32: - is_common_size = true; - break; - default: - is_common_size = false; - break; - } - - if (seg.size > 64) { - read_action_pos = -1; - } else if ((seg.flags & BSF_LITTLE) != 0 && is_common_size) { - seg.action = BsmSegment::action::READ_INTEGER; - read_action_pos = -1; - } else if (read_action_pos < 0 && - seg.action == BsmSegment::action::GET_INTEGER && - is_common_size && i + 1 == count) { - seg.action = BsmSegment::action::READ_INTEGER; - read_action_pos = -1; - } else { - if ((seg.flags & BSF_LITTLE) != 0 || read_action_pos < 0 || - seg.size + segs.at(read_action_pos).size > 64) { - BsmSegment s; - - /* Create a new READ action. */ - read_action_pos = seg_index; - s.action = BsmSegment::action::READ; - s.size = seg.size; - segs.push_back(s); - seg_index++; - } else { - /* Reuse previous READ action. */ - segs.at(read_action_pos).size += seg.size; - } - switch (seg.action) { - case BsmSegment::action::GET_INTEGER: - seg.action = BsmSegment::action::EXTRACT_INTEGER; - break; - case BsmSegment::action::GET_BITSTRING: - seg.action = BsmSegment::action::EXTRACT_BITSTRING; - break; - default: - break; - } - } - segs.push_back(seg); - break; - } - case BsmSegment::action::EQ: { - if (read_action_pos < 0 || - seg.size + segs.at(read_action_pos).size > 64) { - BsmSegment s; - - /* Create a new READ action. */ - read_action_pos = seg_index; - s.action = BsmSegment::action::READ; - s.size = seg.size; - segs.push_back(s); - seg_index++; - } else { - /* Reuse previous READ action. */ - segs.at(read_action_pos).size += seg.size; - } - auto &prev = segs.back(); - if (prev.action == BsmSegment::action::EQ && - prev.size + seg.size <= 64) { - /* Coalesce with the previous EQ instruction. */ - prev.size += seg.size; - prev.unit = prev.unit << seg.size | seg.unit; - seg_index--; - } else { - segs.push_back(seg); - } - break; - } - case BsmSegment::action::SKIP: - if (read_action_pos >= 0 && - seg.size + segs.at(read_action_pos).size <= 64) { - segs.at(read_action_pos).size += seg.size; - seg.action = BsmSegment::action::DROP; - } else { - read_action_pos = -1; - } - segs.push_back(seg); - break; - default: - read_action_pos = -1; - segs.push_back(seg); - break; - } - seg_index++; - } - - /* Handle a trailing test_heap instruction (for the - * i_bs_match_test_heap instruction). */ - if (heap_need) { - BsmSegment seg; - - seg.action = BsmSegment::action::TEST_HEAP; - seg.size = heap_need; - seg.live = Live; - segs.push_back(seg); - } - return segs; -} - -UWord BeamModuleAssembler::bs_get_flags(const ArgVal &val) { - if (val.isNil()) { - return 0; - } else if (val.isLiteral()) { - Eterm term = beamfile_get_literal(beam, val.as().get()); - UWord flags = 0; - - while (is_list(term)) { - Eterm *consp = list_val(term); - Eterm elem = CAR(consp); - switch (elem) { - case am_little: - case am_native: - flags |= BSF_LITTLE; - break; - case am_signed: - flags |= BSF_SIGNED; - break; - } - term = CDR(consp); - } - ASSERT(is_nil(term)); - return flags; - } else if (val.isWord()) { - /* Originates from bs_get_integer2 instruction. */ - return val.as().get(); - } else { - ASSERT(0); /* Should not happen. */ - return 0; - } -} - void BeamModuleAssembler::emit_i_bs_match(ArgLabel const &Fail, ArgRegister const &Ctx, Span const &List) { @@ -3501,79 +3146,8 @@ void BeamModuleAssembler::emit_i_bs_match_test_heap(ArgLabel const &Fail, std::vector segments; - auto current = List.begin(); - auto end = List.begin() + List.size(); - - while (current < end) { - auto cmd = current++->as().get(); - BsmSegment seg; - - switch (cmd) { - case am_ensure_at_least: { - seg.action = BsmSegment::action::ENSURE_AT_LEAST; - seg.size = current[0].as().get(); - seg.unit = current[1].as().get(); - current += 2; - break; - } - case am_ensure_exactly: { - seg.action = BsmSegment::action::ENSURE_EXACTLY; - seg.size = current[0].as().get(); - current += 1; - break; - } - case am_binary: - case am_integer: { - auto size = current[2].as().get(); - auto unit = current[3].as().get(); - - switch (cmd) { - case am_integer: - seg.action = BsmSegment::action::GET_INTEGER; - break; - case am_binary: - seg.action = BsmSegment::action::GET_BITSTRING; - break; - } - - seg.live = current[0]; - seg.size = size * unit; - seg.unit = unit; - seg.flags = bs_get_flags(current[1]); - seg.dst = current[4].as(); - current += 5; - break; - } - case am_get_tail: { - seg.action = BsmSegment::action::GET_TAIL; - seg.live = current[0].as(); - seg.dst = current[2].as(); - current += 3; - break; - } - case am_skip: { - seg.action = BsmSegment::action::SKIP; - seg.size = current[0].as().get(); - seg.flags = 0; - current += 1; - break; - } - case am_Eq: { - seg.action = BsmSegment::action::EQ; - seg.live = current[0]; - seg.size = current[1].as().get(); - seg.unit = current[2].as().get(); - current += 3; - break; - } - default: - abort(); - break; - } - segments.push_back(seg); - } - - segments = opt_bsm_segments(segments, Need, Live); + segments = beam_jit_bsm_init(beam, List); + segments = beam_jit_opt_bsm_segments(segments, Need, Live); /* Constraints: * From 531e542d1e0da012870343cd13d91a7580ddde5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Aug 2025 10:35:48 +0200 Subject: [PATCH 2/3] Optimize construction of little-endian segments In Erlang/OTP 26 (in #6031), the JIT learned to optimize binary construction such as: <> The optimization is done on the native-code level, but the idea behind it can be illustrated in Erlang by rewriting the construction as follows: Acc0 = A, Acc1 = (Acc0 bsl 32) bor B, Acc = (Acc1 bsl 16) bor C, <> When done in native code, the values of the segments is accumulated into a CPU register, which is then written to memory. This is faster than writing each segment to memory one at a time, especially if the sizes are not byte-sized as in the following example: <> This commit introduces a similar optimization for little-endian integer segments. Example: <> This expression can be rewritten as follows: Acc0 = C, Acc1 = (Acc0 bsl 32) bor B, Acc = (Acc1 bsl 16) bor A, <> Note that this rewriting is only safe if all segments except the last one are byte-sized. --- erts/emulator/beam/jit/beam_jit_bs.cpp | 143 ++++++++++++++++------ erts/emulator/test/bs_construct_SUITE.erl | 33 ++++- 2 files changed, 140 insertions(+), 36 deletions(-) diff --git a/erts/emulator/beam/jit/beam_jit_bs.cpp b/erts/emulator/beam/jit/beam_jit_bs.cpp index 470f2a6e8ec4..20cafae47deb 100644 --- a/erts/emulator/beam/jit/beam_jit_bs.cpp +++ b/erts/emulator/beam/jit/beam_jit_bs.cpp @@ -86,9 +86,80 @@ std::vector beam_jit_bsc_init(const Span &args) { return segments; } +static void push_group(std::vector &segs, + std::vector &group) { + BscSegment seg; + + if (group.empty()) { + return; + } + + if ((group.back().flags & BSF_LITTLE) != 0) { + std::reverse(group.begin(), group.end()); + } + + Sint groupSize = 0; + for (size_t i = 0; i < group.size(); i++) { + seg = group[i]; + groupSize += seg.effectiveSize; + if (i == 0) { + seg.action = BscSegment::action::ACCUMULATE_FIRST; + } else { + seg.action = BscSegment::action::ACCUMULATE; + } + segs.push_back(seg); + } + + seg.type = am_integer; + seg.action = BscSegment::action::STORE; + seg.effectiveSize = groupSize; + segs.push_back(seg); + + group.clear(); +} + +/* + * Combine small segments into a group so that the values for the + * segments can be combined into an accumulator register and then + * written to memory. Here is an example in Erlang illustrating the + * idea. Consider this binary construction example: + * + * <> + * + * This can be rewritten as follows: + * + * Acc0 = A, + * Acc1 = (Acc0 bsl 32) bor B, + * Acc = (Acc1 bsl 16) bor C, + * <> + * + * Translated to native code, this is faster because the accumulating + * is done in a CPU register, and then the result is written to memory. + * For big-endian segments, this rewrite works even if sizes are not + * byte-sized. For example: + * + * <> + * + * Little-endian segments can be optimized in a similar way. Consider: + * + * <> + * + * This can be rewritten like so: + * + * Acc0 = C, + * Acc1 = (Acc0 bsl 32) bor B, + * Acc = (Acc1 bsl 16) bor A, + * <> + * + * However, for little-endian segments, this rewriting will only work + * if all segment sizes but the last one are byte-sized. + */ + std::vector beam_jit_bsc_combine_segments( const std::vector segments) { std::vector segs; + std::vector group; + Sint combinedSize = 0; for (auto seg : segments) { switch (seg.type) { @@ -96,59 +167,61 @@ std::vector beam_jit_bsc_combine_segments( if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) { /* Unknown or too large size. Handle using the default * DIRECT action. */ + push_group(segs, group); segs.push_back(seg); continue; } - if (seg.flags & BSF_LITTLE || segs.size() == 0 || - segs.back().action == BscSegment::action::DIRECT) { - /* There are no previous compatible ACCUMULATE / STORE - * actions. Create the first ones. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); + /* The current segment has a known size not exceeding 64 + * bits. Try to add it to the current group. */ + + if (group.empty()) { + group.push_back(seg); + combinedSize = seg.effectiveSize; continue; } - auto prev = segs.back(); - if (prev.flags & BSF_LITTLE) { - /* Little-endian segments cannot be combined with other - * segments. Create new ACCUMULATE_FIRST / STORE actions. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); + /* There is already at least one segment in the group. + * Append the current segment to the group only if it is + * compatible and will fit. */ + + auto &prev = group.back(); + bool sameEndian = + (seg.flags & BSF_LITTLE) == (prev.flags & BSF_LITTLE); + + /* Big-endian segments can always be grouped (if the size + * does not exceed 64 bits). Little-endian segments can + * only be grouped if all but the last segment are + * byte-sized. */ + bool suitableSizes = + ((seg.flags & BSF_LITTLE) == 0 || combinedSize % 8 == 0); + + if (sameEndian && combinedSize + seg.effectiveSize <= 64 && + suitableSizes) { + combinedSize += seg.effectiveSize; + group.push_back(seg); continue; } - /* The current segment is compatible with the previous - * segment. Try combining them. */ - if (prev.effectiveSize + seg.effectiveSize <= 64) { - /* The combined values of the segments fit in the - * accumulator. Insert an ACCUMULATE action for the - * current segment before the pre-existing STORE - * action. */ - segs.pop_back(); - prev.effectiveSize += seg.effectiveSize; - seg.action = BscSegment::action::ACCUMULATE; - segs.push_back(seg); - segs.push_back(prev); - } else { - /* The size exceeds 64 bits. Can't combine. */ - seg.action = BscSegment::action::ACCUMULATE_FIRST; - segs.push_back(seg); - seg.action = BscSegment::action::STORE; - segs.push_back(seg); - } + /* + * Not possible to fit anything more into the group. + * Flush the group and start a new group. + */ + push_group(segs, group); + + group.push_back(seg); + combinedSize = seg.effectiveSize; break; } default: + push_group(segs, group); segs.push_back(seg); break; } } + push_group(segs, group); + /* Calculate bit offsets for each ACCUMULATE segment. */ Uint offset = 0; diff --git a/erts/emulator/test/bs_construct_SUITE.erl b/erts/emulator/test/bs_construct_SUITE.erl index aceb28b800b8..7339b82e0272 100644 --- a/erts/emulator/test/bs_construct_SUITE.erl +++ b/erts/emulator/test/bs_construct_SUITE.erl @@ -212,7 +212,27 @@ l(I_13, I_big1) -> %% Test non-byte sizes and also that the value does not bleed %% into the previous segment. ?T(<<17, I_big1:33>>, <<17, 197,49,128,73,1:1>>), - ?T(<<19, I_big1:39>>, <<19, 11,20,198,1,19:7>>) + ?T(<<19, I_big1:39>>, <<19, 11,20,198,1,19:7>>), + + %% Test multiple little-endian segments. + ?T(<>, + [147,0,13,0,0]), + ?T(<>, + [147,5,147,0]), + ?T(<>, + [147,0,13,0,0,147,0,99,138,5,229,249,42,184,98]), + ?T(<>, + [147,0,99,138,5,229,49,197]), + ?T(<>, + [147,0,24,83,198,20,20,11]), + ?T(<>, + [147,0,99,96,76,152,98,98,65,121,190]), + ?T(<<0:5,I_big1:16/little, I_13:3/little>>, + [4,152,5]), + ?T(<<0:5,I_big1:16/little, (I_big1 bsr 15):19/little>>, + [4,152,6,48,163]) ]. native_3798() -> @@ -842,6 +862,7 @@ dynamic_little(Bef, N, Int, Lpad, Rpad) -> Bin = <>, if + %% Test unusual units. Bef rem 8 =:= 0 -> Bin = <>; @@ -851,6 +872,16 @@ dynamic_little(Bef, N, Int, Lpad, Rpad) -> (128-Bef-N) rem 17 =:= 0 -> Aft = (128 - Bef - N) div 17, Bin = <>; + + %% Test combinations of little-integer segments of fixed size. + Bef =:= 33, N =:= 45 -> + Bin = <>; + Bef =:= 16, N =:= 40 -> + Bin = <>; + Bef =:= 16, N =:= 48 -> + Bin = <>; + Bef =:= 65, N =:= 32 -> + Bin = <>; true -> ok end, From 24e5b6cbf7c9d1ce781d2f857f5c7215c59d4a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 8 Aug 2025 07:56:43 +0200 Subject: [PATCH 3/3] Optimize matching of little-endian segments Multiple little-endians segments were handled one at a time. It turns out that it is safe to match out multiple little-endian segments at once, or even any mix of big and little endian segments at once. --- erts/emulator/beam/jit/beam_jit_bs.cpp | 5 +- erts/emulator/test/bs_match_int_SUITE.erl | 97 +++++++++++++++++++++-- 2 files changed, 93 insertions(+), 9 deletions(-) diff --git a/erts/emulator/beam/jit/beam_jit_bs.cpp b/erts/emulator/beam/jit/beam_jit_bs.cpp index 20cafae47deb..d8549a5c926d 100644 --- a/erts/emulator/beam/jit/beam_jit_bs.cpp +++ b/erts/emulator/beam/jit/beam_jit_bs.cpp @@ -419,16 +419,13 @@ std::vector beam_jit_opt_bsm_segments( if (seg.size > 64) { read_action_pos = -1; - } else if ((seg.flags & BSF_LITTLE) != 0 && is_common_size) { - seg.action = BsmSegment::action::READ_INTEGER; - read_action_pos = -1; } else if (read_action_pos < 0 && seg.action == BsmSegment::action::GET_INTEGER && is_common_size && i + 1 == count) { seg.action = BsmSegment::action::READ_INTEGER; read_action_pos = -1; } else { - if ((seg.flags & BSF_LITTLE) != 0 || read_action_pos < 0 || + if (read_action_pos < 0 || seg.size + segs.at(read_action_pos).size > 64) { BsmSegment s; diff --git a/erts/emulator/test/bs_match_int_SUITE.erl b/erts/emulator/test/bs_match_int_SUITE.erl index 646d03c2391a..97b82f6c5108 100644 --- a/erts/emulator/test/bs_match_int_SUITE.erl +++ b/erts/emulator/test/bs_match_int_SUITE.erl @@ -601,7 +601,10 @@ cmp128(<>, I) -> equal; cmp128(_, _) -> not_equal. mixed_sizes(_Config) -> - mixed({345,42}, + _ = rand:uniform(), %Seed generator + io:format("Seed: ~p", [rand:export_seed()]), + + deterministic_mixed({345,42}, fun({A,B}) -> <>; (<>) -> @@ -614,13 +617,37 @@ mixed_sizes(_Config) -> (<>) -> {A,B,C,D,E} end), + mixed({27033,59991,16#c001cafe,12345,2}, + fun({A,B,C,D,E}) -> + <>; + (<>) -> + {A,B,C,D,E} + end), - mixed({79,153,17555,50_000,777_000,36#hugebignumber,2222}, + deterministic_mixed({79,153,17555,50_000,777_000,36#hugebignumber,2222}, fun({A,B,C,D,E,F,G}) -> <>; (<>) -> {A,B,C,D,E,F,G} end), + deterministic_mixed({79,153,17555,50_000,777_000,36#hugebignumber,2222}, + fun({A,B,C,D,E,F,G}) -> + <>; + (<>) -> + {A,B,C,D,E,F,G} + end), + deterministic_mixed({79,153,17555,50_000,777_000,36#hugebignumber,2222}, + fun({A,B,C,D,E,F,G}) -> + <>; + (<>) -> + {A,B,C,D,E,F,G} + end), mixed({16#123456789ABCDEF,13,36#hugenum,979}, fun({A,B,C,D}) -> @@ -628,7 +655,6 @@ mixed_sizes(_Config) -> (<>) -> {A,B,C,D} end), - mixed({16#123456789ABCDEF,13,36#hugenum,979}, fun({A,B,C,D}) -> <>; @@ -657,12 +683,20 @@ mixed_sizes(_Config) -> {A,B,C} end), - mixed({5,9,38759385,93}, + deterministic_mixed({5,9,38759385,93}, fun({A,B,C,D}) -> <<1:3,A:4,B:5,C:47,D:7>>; (<<1:3,A:4,B:5,C:47,D:7>>) -> {A,B,C,D} end), + deterministic_mixed({5,9,38759385,93}, + fun({A,B,C,D}) -> + <<1:3/little,A:4/little,B:5/little, + C:47/little,D:7/little>>; + (<<1:3/little,A:4/little,B:5/little, + C:47/little,D:7/little>>) -> + {A,B,C,D} + end), mixed({2022,8,22}, fun({A,B,C}) -> @@ -678,12 +712,62 @@ mixed_sizes(_Config) -> _ = id(0), {A,B,C} end), + + %% Additional roundtrip testing without facit. + + mixed(fun({A,B,C,D}) -> + <>; + (<>) -> + {A,B,C,D} + end), + + mixed(fun({A,B,C,D,E}) -> + <>; + (<>) -> + {A,B,C,D,E} + end), + + mixed(fun({A,B,C,D,E,F,G,H}) -> + <>; + (<>) -> + {A,B,C,D,E,F,G,H} + end), + ok. +mixed(F) when is_function(F, 1) -> + NumBits = mixed_bit_size(F, 1), + rand_mixed(NumBits, F). + +mixed_bit_size(F, N) when N < 1000 -> + try F(erlang:make_tuple(N, 0)) of + Bits when is_bitstring(Bits) -> + bit_size(Bits) + catch + _:_ -> + mixed_bit_size(F, N + 1) + end. + mixed(Data, F) -> + deterministic_mixed(Data, F), + rand_mixed(bit_size(F(Data)), F). + +deterministic_mixed(Data, F) -> Bin = F(Data), Data = F(Bin), - true = is_bitstring(Bin). + true = is_bitstring(Bin), + UnalignedBin = make_unaligned_sub_binary(Bin), + Data = F(UnalignedBin), + true = is_bitstring(UnalignedBin). + +rand_mixed(NumBits, F) -> + <> = rand:bytes((NumBits + 7) div 8), + Data = F(Bin), + Bin = F(Data), + UnalignedBin = make_unaligned_sub_binary(Bin), + Data = F(UnalignedBin). signed_integer(Config) when is_list(Config) -> {no_match,_} = sint(mkbin([])), @@ -999,4 +1083,7 @@ unit(_Config) -> %%% Common utilities. %%% +make_unaligned_sub_binary(Bin) -> + erts_debug:unaligned_bitstring(Bin, 3). + id(I) -> I.