From e7bf3c20c73cbd1a773da34eaa7d1d06faec25f6 Mon Sep 17 00:00:00 2001 From: fruno Date: Fri, 7 Nov 2025 14:02:16 +0100 Subject: [PATCH 1/4] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Optimize=20bit=20array?= =?UTF-8?q?=20int=20segment=20fit=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compiler-core/src/exhaustiveness.rs | 81 +++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/compiler-core/src/exhaustiveness.rs b/compiler-core/src/exhaustiveness.rs index 007b42ac0db..5f2ddc69d71 100644 --- a/compiler-core/src/exhaustiveness.rs +++ b/compiler-core/src/exhaustiveness.rs @@ -86,7 +86,7 @@ use bitvec::{order::Msb0, slice::BitSlice, view::BitView}; use ecow::EcoString; use id_arena::{Arena, Id}; use itertools::Itertools; -use num_bigint::BigInt; +use num_bigint::{BigInt, Sign}; use num_traits::ToPrimitive; use radix_trie::{Trie, TrieCommon}; use std::{ @@ -1270,7 +1270,7 @@ impl BitArrayMatchedValue { BitArrayMatchedValue::Assign { value, .. } => value.is_impossible_segment(read_action), BitArrayMatchedValue::LiteralInt { value, location } => { let size = read_action.size.constant_bits()?.to_u32()?; - if representable_with_bits(value.clone(), size, read_action.signed) { + if representable_with_bits(value, size, read_action.signed) { None } else { Some(ImpossibleBitArraySegmentPattern::UnrepresentableInteger { @@ -3674,18 +3674,35 @@ fn superset( } #[must_use] -fn representable_with_bits(value: BigInt, bits: u32, signed: bool) -> bool { +fn representable_with_bits(value: &BigInt, bits: u32, signed: bool) -> bool { // No number is representable in 0 bits. if bits == 0 { return false; }; - if signed { - // Signed numbers range in [-2^(bits-1), 2^(bits-1)[ - let power = BigInt::from(2).pow(bits - 1); - -&power <= value && value < power + let is_negative = value.sign() == Sign::Minus; + if is_negative && !signed { + return false; + } + + let bits_unsigned = value.bits(); + let required_bits = if !signed { + bits_unsigned + } else if !is_negative { + // The minimal representation of an unsigned number always has 1 as its most + // significant bit and thus always needs an extra bit for the sign. + bits_unsigned + 1 + } else if let Some(trailing_zeros) = value.trailing_zeros() { + // Negative powers of two don't need an extra sign bit. E.g. `-2 == 0b10` + let is_power_of_2 = trailing_zeros == bits_unsigned - 1; + bits_unsigned + (!is_power_of_2 as u64) } else { - // Unsigned numbers range in [0, 2^bits[ - BigInt::from(0) <= value && value < BigInt::from(2).pow(bits) + // The value is 0 + 1 + }; + + match required_bits.to_u32() { + Some(r) => bits >= r, + None => false, } } @@ -3697,32 +3714,50 @@ mod representable_with_bits_test { #[test] fn positive_number_representable_with_bits_test() { // 9 can be represented as a >=4 bits unsigned number. - assert!(representable_with_bits(9.into(), 4, false)); - assert!(representable_with_bits(9.into(), 5, false)); + assert!(representable_with_bits(&9.into(), 4, false)); + assert!(representable_with_bits(&9.into(), 5, false)); // But not in <=3 bits as an unsigned number. - assert!(!representable_with_bits(9.into(), 3, false)); - assert!(!representable_with_bits(9.into(), 2, false)); + assert!(!representable_with_bits(&9.into(), 3, false)); + assert!(!representable_with_bits(&9.into(), 2, false)); // It can be represented as a >=5 bit signed number. - assert!(representable_with_bits(9.into(), 5, true)); - assert!(representable_with_bits(9.into(), 6, true)); + assert!(representable_with_bits(&9.into(), 5, true)); + assert!(representable_with_bits(&9.into(), 6, true)); // But not in <= 4 bits as a signed number. - assert!(!representable_with_bits(9.into(), 4, true)); - assert!(!representable_with_bits(9.into(), 3, true)); + assert!(!representable_with_bits(&9.into(), 4, true)); + assert!(!representable_with_bits(&9.into(), 3, true)); } #[test] fn negative_number_representable_with_bits_test() { // A negative number will never be representable as an unsigned number, // no matter the number of bits! - assert!(!representable_with_bits(BigInt::from(-9), 1, false)); - assert!(!representable_with_bits(BigInt::from(-9), 500, false)); + assert!(!representable_with_bits(&BigInt::from(-9), 1, false)); + assert!(!representable_with_bits(&BigInt::from(-9), 500, false)); // -9 can be represented in >=5 bits as a signed number. - assert!(representable_with_bits(BigInt::from(-9), 5, true)); - assert!(representable_with_bits(BigInt::from(-9), 6, true)); + assert!(representable_with_bits(&BigInt::from(-9), 5, true)); + assert!(representable_with_bits(&BigInt::from(-9), 6, true)); // But not in <= 4 bits as a signed number. - assert!(!representable_with_bits(BigInt::from(-9), 4, true)); - assert!(!representable_with_bits(BigInt::from(-9), 3, true)); + assert!(!representable_with_bits(&BigInt::from(-9), 4, true)); + assert!(!representable_with_bits(&BigInt::from(-9), 3, true)); + } + + #[test] + fn number_representable_with_sign_test() { + // Sign needs one additional bit + assert!(representable_with_bits(&8.into(), 4, false)); + assert!(!representable_with_bits(&8.into(), 4, true)); + assert!(representable_with_bits(&8.into(), 5, true)); + + // Negative number must be signed + assert!(!representable_with_bits(&BigInt::from(-8), 100, false)); + + // Negative powers of two don't need an extra sign bit + assert!(!representable_with_bits(&BigInt::from(-8), 3, true)); + assert!(representable_with_bits(&BigInt::from(-8), 4, true)); + + assert!(!representable_with_bits(&BigInt::from(-9), 4, true)); + assert!(representable_with_bits(&BigInt::from(-9), 5, true)); } } From b1d961479e9c683c24e25d6fae58b6959671346d Mon Sep 17 00:00:00 2001 From: fruno Date: Thu, 6 Nov 2025 17:19:28 +0100 Subject: [PATCH 2/4] =?UTF-8?q?=E2=9C=A8=20Include=20constant=20int=20segm?= =?UTF-8?q?ents=20in=20interference=20analysis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + compiler-core/Cargo.toml | 2 +- compiler-core/src/exhaustiveness.rs | 253 +++++++++++++++++- ...ttern_after_unreachable_equal_pattern.snap | 28 ++ ...reachable_int_pattern_with_prefix_int.snap | 25 ++ ...int_pattern_with_string_of_same_value.snap | 25 ++ compiler-core/src/type_/tests/warnings.rs | 43 +++ 7 files changed, 364 insertions(+), 13 deletions(-) create mode 100644 compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__reachable_pattern_after_unreachable_equal_pattern.snap create mode 100644 compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_prefix_int.snap create mode 100644 compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_string_of_same_value.snap diff --git a/Cargo.lock b/Cargo.lock index fa5a8a89906..9228d2fb9ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -314,6 +314,7 @@ checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ "funty", "radium", + "serde", "tap", "wyz", ] diff --git a/compiler-core/Cargo.toml b/compiler-core/Cargo.toml index 0e7d9abb0af..7935518dfee 100644 --- a/compiler-core/Cargo.toml +++ b/compiler-core/Cargo.toml @@ -49,7 +49,7 @@ radix_trie = "0.2.1" # Ensuring recursive type-checking doesn't stack overflow stacker = "0.1.21" # Manipulating bit arrays -bitvec = "1" +bitvec = { version = "1", features = ["serde"] } async-trait.workspace = true base16.workspace = true diff --git a/compiler-core/src/exhaustiveness.rs b/compiler-core/src/exhaustiveness.rs index 5f2ddc69d71..2bda2d5eb18 100644 --- a/compiler-core/src/exhaustiveness.rs +++ b/compiler-core/src/exhaustiveness.rs @@ -82,7 +82,7 @@ use crate::{ is_prelude_module, string, }, }; -use bitvec::{order::Msb0, slice::BitSlice, view::BitView}; +use bitvec::{order::Msb0, slice::BitSlice, vec::BitVec, view::BitView}; use ecow::EcoString; use id_arena::{Arena, Id}; use itertools::Itertools; @@ -91,6 +91,7 @@ use num_traits::ToPrimitive; use radix_trie::{Trie, TrieCommon}; use std::{ cell::RefCell, + cmp::Ordering, collections::{HashMap, HashSet, VecDeque}, hash::Hash, sync::Arc, @@ -1213,6 +1214,9 @@ pub enum BitArrayMatchedValue { /// is deemed unreachable: it is the location this literal value comes /// from in the whole pattern. location: SrcSpan, + /// The bits representing the given literal int, with the correct + /// signed- and endianness as specified in the bit array segment. + bits: Result, IntToBitsError>, }, LiteralString { value: EcoString, @@ -1230,6 +1234,12 @@ pub enum BitArrayMatchedValue { } impl BitArrayMatchedValue { + /// This is an arbitrary limit beyond which interference _may_ no longer be done. + /// This is necessary because people may write very silly segments like + /// `<<0:9_007_199_254_740_992>>`, which would allocate around a wee petabyte of memory. + /// Literal strings are already in memory, and thus ignore this limit. + const MAX_BITS_INTERFERENCE: u32 = u16::MAX as u32; + pub(crate) fn is_literal(&self) -> bool { match self { BitArrayMatchedValue::LiteralFloat(_) @@ -1248,12 +1258,11 @@ impl BitArrayMatchedValue { match self { BitArrayMatchedValue::LiteralString { bytes, .. } => Some(bytes.view_bits::()), BitArrayMatchedValue::Assign { value, .. } => value.constant_bits(), + BitArrayMatchedValue::LiteralInt { bits, .. } => bits.as_deref().ok(), // TODO: We could also implement the interfering optimisation for - // literal ints as well, but that will be a bit trickier than - // strings. - BitArrayMatchedValue::LiteralInt { .. } - | BitArrayMatchedValue::LiteralFloat(_) + // literal floats as well, but the usefulness is questionable + BitArrayMatchedValue::LiteralFloat(_) | BitArrayMatchedValue::Variable(_) | BitArrayMatchedValue::Discard(_) => None, } @@ -1268,19 +1277,21 @@ impl BitArrayMatchedValue { ) -> Option { match self { BitArrayMatchedValue::Assign { value, .. } => value.is_impossible_segment(read_action), - BitArrayMatchedValue::LiteralInt { value, location } => { - let size = read_action.size.constant_bits()?.to_u32()?; - if representable_with_bits(value, size, read_action.signed) { - None - } else { + BitArrayMatchedValue::LiteralInt { + value, + location, + bits, + } => match bits { + Err(IntToBitsError::Unrepresentable { size }) => { Some(ImpossibleBitArraySegmentPattern::UnrepresentableInteger { value: value.clone(), - size, + size: *size, location: *location, signed: read_action.signed, }) } - } + _ => None, + }, BitArrayMatchedValue::LiteralFloat(_) | BitArrayMatchedValue::LiteralString { .. } @@ -3520,6 +3531,12 @@ fn segment_matched_value( } => BitArrayMatchedValue::LiteralInt { value: int_value.clone(), location: *location, + bits: int_to_bits( + int_value, + &read_action.size, + read_action.endianness, + read_action.signed, + ), }, ast::Pattern::Float { value, .. } => BitArrayMatchedValue::LiteralFloat(value.clone()), ast::Pattern::String { value, .. } if segment.has_utf16_option() => { @@ -3557,6 +3574,92 @@ fn segment_matched_value( } } +fn int_to_bits( + value: &BigInt, + read_size: &ReadSize, + endianness: Endianness, + signed: bool, +) -> Result, IntToBitsError> { + let size = read_size + .constant_bits() + .ok_or(IntToBitsError::NonConstantSize)? + .to_u32() + .ok_or(IntToBitsError::ExceedsMaximumSize)?; + + if !representable_with_bits(value, size, signed) { + return Err(IntToBitsError::Unrepresentable { size }); + } else if size > BitArrayMatchedValue::MAX_BITS_INTERFERENCE { + return Err(IntToBitsError::ExceedsMaximumSize); + } + + // Pad negative numbers with 1s (true) and non-negative numbers with 0s (false) + let pad_digit = value.sign() == Sign::Minus; + let size = size as usize; + let mut bytes = int_to_bytes(value, endianness, signed); + let bytes_size = bytes.len() * 8; + + // There are 3 cases, which are easier to handle separately by endianness + // If the size of the bigint bytes equals the expected bits, we can return them as-is + // If there are more bits than we need, we need to trim some of the most significant bits. + // E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits. + // Values like 999:3 are illegal and caught by the guard at the start of the function. + // If there are fewer bits than we need, we need to add some. + // E.g. `6:13` yields one byte which we need to pad with 5 bits + let bits = match (endianness, bytes_size.cmp(&size)) { + (_, Ordering::Equal) => BitVec::from_vec(bytes), + + (Endianness::Big, Ordering::Greater) => { + BitVec::from_bitslice(&bytes.view_bits()[bytes_size - size..]) + } + (Endianness::Big, Ordering::Less) => { + let mut bits = BitVec::repeat(pad_digit, size - bytes_size); + bits.extend_from_raw_slice(&bytes); + bits + } + + (Endianness::Little, Ordering::Greater) => { + // If the difference is greater than a byte, we returned an Error earlier + let remainder = size % 8; + if remainder == 0 { + BitVec::from_vec(bytes) + } else { + // If the size is not a multiple of 8, we need to truncate the most significant bits. + // As they are in the last byte, we leftshift by the appropriate amount and + // truncate the final bits after conversion + let last_byte = bytes.last_mut().expect("bytes must not be empty"); + *last_byte <<= 8 - remainder; + + let mut bits = BitVec::from_vec(bytes); + bits.truncate(size); + bits + } + } + (Endianness::Little, Ordering::Less) => { + let mut bits = BitVec::from_vec(bytes); + let padding: BitVec = BitVec::repeat(pad_digit, size - bytes_size); + bits.extend_from_bitslice(padding.as_bitslice()); + bits + } + }; + Ok(bits) +} + +fn int_to_bytes(value: &BigInt, endianness: Endianness, signed: bool) -> Vec { + match (endianness, signed) { + (Endianness::Big, false) => value.to_bytes_be().1, + (Endianness::Big, true) => value.to_signed_bytes_be(), + (Endianness::Little, false) => value.to_bytes_le().1, + (Endianness::Little, true) => value.to_signed_bytes_le(), + } +} + +#[derive(Clone, Copy, Eq, PartialEq, Debug, serde::Serialize, serde::Deserialize)] +pub enum IntToBitsError { + Unrepresentable { size: u32 }, + ExceedsMaximumSize, + NonConstantSize, +} + fn segment_size( segment: &TypedPatternBitArraySegment, pattern_variables: &HashMap, @@ -3761,3 +3864,129 @@ mod representable_with_bits_test { assert!(representable_with_bits(&BigInt::from(-9), 5, true)); } } + +#[cfg(test)] +mod int_to_bits_test { + use std::assert_eq; + + use crate::{ + ast::Endianness, + exhaustiveness::{BitArrayMatchedValue, IntToBitsError, ReadSize, int_to_bits}, + }; + use bitvec::{bitvec, order::Msb0, vec::BitVec}; + use num_bigint::BigInt; + + fn read_size(size: u32) -> ReadSize { + ReadSize::ConstantBits(BigInt::from(size)) + } + + #[test] + fn int_to_bits_size_too_big() { + assert_eq!( + int_to_bits( + &BigInt::ZERO, + &read_size(BitArrayMatchedValue::MAX_BITS_INTERFERENCE + 1), + Endianness::Big, + true, + ), + Err(IntToBitsError::ExceedsMaximumSize), + ); + } + + #[test] + fn int_to_bits_zero() { + let expect = Ok(bitvec![u8, Msb0; 0; 3]); + assert_eq!( + int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Big, false), + expect + ); + assert_eq!( + int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Little, false), + expect + ); + + let expect = Ok(bitvec![u8, Msb0; 0; 10]); + assert_eq!( + int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Big, false), + expect + ); + assert_eq!( + int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Little, false), + expect + ); + } + + #[test] + fn int_to_bits_positive() { + // Exact match + assert_eq!( + int_to_bits( + &BigInt::from(0xff00), + &read_size(16), + Endianness::Big, + false + ), + Ok(BitVec::::from_vec(vec![0xff, 0x00])), + ); + assert_eq!( + int_to_bits( + &BigInt::from(0xff00), + &read_size(16), + Endianness::Little, + false + ), + Ok(BitVec::::from_vec(vec![0x00, 0xff])), + ); + + assert_eq!( + int_to_bits( + &BigInt::from(0b11_1111_0000), + &read_size(10), + Endianness::Big, + false + ), + Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), + ); + assert_eq!( + int_to_bits( + &BigInt::from(0b11_1111_0000), + &read_size(10), + Endianness::Little, + false + ), + Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 0, 0, 0, 0, 1, 1]), + ); + + // Too few bits in int + assert_eq!( + int_to_bits(&BigInt::from(0xff), &read_size(12), Endianness::Big, false), + Ok(bitvec![u8, Msb0; 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]), + ); + assert_eq!( + int_to_bits( + &BigInt::from(0xff), + &read_size(12), + Endianness::Little, + false + ), + Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), + ); + } + + #[test] + fn int_to_bits_signed() { + assert_eq!( + int_to_bits(&BigInt::from(-128), &read_size(12), Endianness::Big, true), + Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]), + ); + assert_eq!( + int_to_bits( + &BigInt::from(-128), + &read_size(12), + Endianness::Little, + true + ), + Ok(bitvec![u8, Msb0; 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]), + ); + } +} diff --git a/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__reachable_pattern_after_unreachable_equal_pattern.snap b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__reachable_pattern_after_unreachable_equal_pattern.snap new file mode 100644 index 00000000000..420760ca365 --- /dev/null +++ b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__reachable_pattern_after_unreachable_equal_pattern.snap @@ -0,0 +1,28 @@ +--- +source: compiler-core/src/type_/tests/warnings.rs +expression: "\npub fn wibble(bits) {\n case bits {\n <<97:3>> -> 1\n <<\"a\">> -> 2\n _ -> 3\n }\n}\n" +--- +----- SOURCE CODE + +pub fn wibble(bits) { + case bits { + <<97:3>> -> 1 + <<"a">> -> 2 + _ -> 3 + } +} + + +----- WARNING +warning: Unreachable pattern + ┌─ /src/warning/wrn.gleam:4:5 + │ +4 │ <<97:3>> -> 1 + │ ^^^^^^^^ + │ │ + │ A 3 bits unsigned integer will never match this value + +This pattern cannot be reached as it contains segments that will never +match. + +Hint: It can be safely removed. diff --git a/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_prefix_int.snap b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_prefix_int.snap new file mode 100644 index 00000000000..993ecd325e0 --- /dev/null +++ b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_prefix_int.snap @@ -0,0 +1,25 @@ +--- +source: compiler-core/src/type_/tests/warnings.rs +expression: "\npub fn wibble(bits) {\n case bits {\n <<0b1:1, _:1>> -> 1\n <<0b11:2>> -> 2\n _ -> 3\n }\n}" +--- +----- SOURCE CODE + +pub fn wibble(bits) { + case bits { + <<0b1:1, _:1>> -> 1 + <<0b11:2>> -> 2 + _ -> 3 + } +} + +----- WARNING +warning: Unreachable pattern + ┌─ /src/warning/wrn.gleam:5:5 + │ +5 │ <<0b11:2>> -> 2 + │ ^^^^^^^^^^ + +This pattern cannot be reached as a previous pattern matches the same +values. + +Hint: It can be safely removed. diff --git a/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_string_of_same_value.snap b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_string_of_same_value.snap new file mode 100644 index 00000000000..875bcb7e709 --- /dev/null +++ b/compiler-core/src/type_/tests/snapshots/gleam_core__type___tests__warnings__unreachable_int_pattern_with_string_of_same_value.snap @@ -0,0 +1,25 @@ +--- +source: compiler-core/src/type_/tests/warnings.rs +expression: "\npub fn wibble(bits) {\n case bits {\n <<\"a\">> -> 1\n <<97>> -> 2\n _ -> 3\n }\n}" +--- +----- SOURCE CODE + +pub fn wibble(bits) { + case bits { + <<"a">> -> 1 + <<97>> -> 2 + _ -> 3 + } +} + +----- WARNING +warning: Unreachable pattern + ┌─ /src/warning/wrn.gleam:5:5 + │ +5 │ <<97>> -> 2 + │ ^^^^^^ + +This pattern cannot be reached as a previous pattern matches the same +values. + +Hint: It can be safely removed. diff --git a/compiler-core/src/type_/tests/warnings.rs b/compiler-core/src/type_/tests/warnings.rs index 69cba489564..0c17b9d9125 100644 --- a/compiler-core/src/type_/tests/warnings.rs +++ b/compiler-core/src/type_/tests/warnings.rs @@ -4515,6 +4515,49 @@ pub fn wibble(bits) { ); } +#[test] +fn unreachable_int_pattern_with_string_of_same_value() { + assert_warning!( + r#" +pub fn wibble(bits) { + case bits { + <<"a">> -> 1 + <<97>> -> 2 + _ -> 3 + } +}"# + ); +} + +#[test] +fn unreachable_int_pattern_with_prefix_int() { + assert_warning!( + r#" +pub fn wibble(bits) { + case bits { + <<0b1:1, _:1>> -> 1 + <<0b11:2>> -> 2 + _ -> 3 + } +}"# + ); +} + +#[test] +fn reachable_pattern_after_unreachable_equal_pattern() { + assert_warning!( + r#" +pub fn wibble(bits) { + case bits { + <<97:3>> -> 1 + <<"a">> -> 2 + _ -> 3 + } +} +"# + ); +} + #[test] fn unused_recursive_function_argument() { assert_warning!( From 8828b866bf71dd8ec43fefb15115d3fcb64b14e7 Mon Sep 17 00:00:00 2001 From: fruno Date: Thu, 6 Nov 2025 23:49:03 +0100 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=93=9D=20Update=20changelog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 172b49e1a0b..7e1a1252ff3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,26 @@ ([Surya Rose](https://github.com/GearsDatapacks)) +- The [interference-based pruning](https://gleam.run/news/formalising-external-apis/#Improved-bit-array-exhaustiveness-checking) + from 1.13 has been extended to int segments! + Aside from the various performance improvements, this allows the compiler to + mark more branches as unreachable. + ```gleam + case bits { + <<"a">> -> 0 + <<97>> -> 1 + // ^- This branch is unreachable because it's equal to "a". + + <<0b1:1, _:1>> -> 2 + <<0b11:2>> -> 3 + // ^- This branch is unreachable because the branch before it already covers it. + + _ -> 99 + } + ``` + ([fruno](https://github.com/fruno-bulax/)) +>>>>>>> 956f7802d (📝 Update changelog) + ### Build tool - The help text displayed by `gleam dev --help`, `gleam test --help`, and From f75ac8285b7536209f2413716274749991bb4182 Mon Sep 17 00:00:00 2001 From: fruno Date: Mon, 10 Nov 2025 16:17:15 +0100 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=8E=A8=20Improve=20code=20clarity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compiler-core/src/exhaustiveness.rs | 69 ++++++++++++++++++----------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/compiler-core/src/exhaustiveness.rs b/compiler-core/src/exhaustiveness.rs index 2bda2d5eb18..a2dd476a87d 100644 --- a/compiler-core/src/exhaustiveness.rs +++ b/compiler-core/src/exhaustiveness.rs @@ -3598,19 +3598,17 @@ fn int_to_bits( let mut bytes = int_to_bytes(value, endianness, signed); let bytes_size = bytes.len() * 8; - // There are 3 cases, which are easier to handle separately by endianness - // If the size of the bigint bytes equals the expected bits, we can return them as-is - // If there are more bits than we need, we need to trim some of the most significant bits. - // E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits. - // Values like 999:3 are illegal and caught by the guard at the start of the function. - // If there are fewer bits than we need, we need to add some. - // E.g. `6:13` yields one byte which we need to pad with 5 bits let bits = match (endianness, bytes_size.cmp(&size)) { (_, Ordering::Equal) => BitVec::from_vec(bytes), + // Even though we return an error if `value` cannot be represented in `size` bits, + // we may need to slice off up a couple bits if the read size is not a multiple of 8. + // <3:4> yields the bytes [0b0000_0011]. We slice off 4 bits and get 0011 (Endianness::Big, Ordering::Greater) => { BitVec::from_bitslice(&bytes.view_bits()[bytes_size - size..]) } + // If the number needs fewer bits than the read size, we pad it. + // <3:9> yields the bytes [0b0000_0011]. We add one digit and get 0_0000_0011 (Endianness::Big, Ordering::Less) => { let mut bits = BitVec::repeat(pad_digit, size - bytes_size); bits.extend_from_raw_slice(&bytes); @@ -3782,29 +3780,36 @@ fn representable_with_bits(value: &BigInt, bits: u32, signed: bool) -> bool { if bits == 0 { return false; }; - let is_negative = value.sign() == Sign::Minus; - if is_negative && !signed { - return false; - } - let bits_unsigned = value.bits(); - let required_bits = if !signed { - bits_unsigned - } else if !is_negative { - // The minimal representation of an unsigned number always has 1 as its most - // significant bit and thus always needs an extra bit for the sign. - bits_unsigned + 1 - } else if let Some(trailing_zeros) = value.trailing_zeros() { - // Negative powers of two don't need an extra sign bit. E.g. `-2 == 0b10` - let is_power_of_2 = trailing_zeros == bits_unsigned - 1; - bits_unsigned + (!is_power_of_2 as u64) - } else { - // The value is 0 - 1 + let required_bits = match (value.sign(), signed) { + // Zero always needs one bit. + (Sign::NoSign, _) => 1, + + // `BigInt::bits` does not consider the sign! + (Sign::Plus, false) => value.bits(), + // Therefore we need to add the sign bit here: `10` -> `010` + (Sign::Plus, true) => value.bits() + 1, + + // A negative number must be signed + (Sign::Minus, false) => return false, + (Sign::Minus, true) => { + let bits_unsigned = value.bits(); + let trailing_zeros = value + .trailing_zeros() + .expect("trailing_zeros to return a value for non-zero numbers"); + let is_power_of_2 = trailing_zeros == bits_unsigned - 1; + + // Negative powers of two don't need an extra sign bit. E.g. `-2 == 0b10` + if is_power_of_2 { + bits_unsigned + } else { + bits_unsigned + 1 + } + } }; match required_bits.to_u32() { - Some(r) => bits >= r, + Some(required_bits) => bits >= required_bits, None => false, } } @@ -3846,6 +3851,18 @@ mod representable_with_bits_test { assert!(!representable_with_bits(&BigInt::from(-9), 3, true)); } + #[test] + fn zero_representable_with_bits_test() { + for i in 0..12 { + println!("{i}: {}", BigInt::from(i).bits()); + } + assert!(!representable_with_bits(&BigInt::ZERO, 0, false)); + assert!(!representable_with_bits(&BigInt::ZERO, 0, true)); + + assert!(representable_with_bits(&BigInt::ZERO, 1, false)); + assert!(representable_with_bits(&BigInt::ZERO, 1, true)); + } + #[test] fn number_representable_with_sign_test() { // Sign needs one additional bit