Skip to content

Commit 79e0ea8

Browse files
committed
✨ Include constant int segments in interference analysis
1 parent 4b8758c commit 79e0ea8

File tree

6 files changed

+272
-8
lines changed

6 files changed

+272
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

compiler-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ radix_trie = "0.2.1"
4949
# Ensuring recursive type-checking doesn't stack overflow
5050
stacker = "0.1.21"
5151
# Manipulating bit arrays
52-
bitvec = "1"
52+
bitvec = { version = "1", features = ["serde"] }
5353

5454
async-trait.workspace = true
5555
base16.workspace = true

compiler-core/src/exhaustiveness.rs

Lines changed: 192 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,16 @@ use crate::{
8181
is_prelude_module, string,
8282
},
8383
};
84-
use bitvec::{order::Msb0, slice::BitSlice, view::BitView};
84+
use bitvec::{order::Msb0, slice::BitSlice, vec::BitVec, view::BitView};
8585
use ecow::EcoString;
8686
use id_arena::{Arena, Id};
8787
use itertools::Itertools;
88-
use num_bigint::BigInt;
88+
use num_bigint::{BigInt, Sign};
8989
use num_traits::ToPrimitive;
9090
use radix_trie::{Trie, TrieCommon};
9191
use std::{
9292
cell::RefCell,
93+
cmp::Ordering,
9394
collections::{HashMap, HashSet, VecDeque},
9495
hash::Hash,
9596
sync::Arc,
@@ -1212,6 +1213,11 @@ pub enum BitArrayMatchedValue {
12121213
/// is deemed unreachable: it is the location this literal value comes
12131214
/// from in the whole pattern.
12141215
location: SrcSpan,
1216+
/// The bits representing the given literal integer, with the correct
1217+
/// signed- and endianness as specified in the bit array segment.
1218+
/// Present iff the segment has a constant size that is not greater than
1219+
/// [BitArrayMatchedValue::MAX_BITS_INTERFERENCE]
1220+
bits: Option<BitVec<u8, Msb0>>,
12151221
},
12161222
LiteralString {
12171223
value: EcoString,
@@ -1229,6 +1235,12 @@ pub enum BitArrayMatchedValue {
12291235
}
12301236

12311237
impl BitArrayMatchedValue {
1238+
/// This is an arbitrary limit beyond which interference _may_ no longer be done.
1239+
/// This is necessary because people may write very silly segments like
1240+
/// `<<0:9_007_199_254_740_992>>`, which would allocate around a wee petabyte of memory.
1241+
/// Literal strings are already in memory, and thus ignore this limit.
1242+
const MAX_BITS_INTERFERENCE: usize = u16::MAX as usize;
1243+
12321244
pub(crate) fn is_literal(&self) -> bool {
12331245
match self {
12341246
BitArrayMatchedValue::LiteralFloat(_)
@@ -1247,12 +1259,11 @@ impl BitArrayMatchedValue {
12471259
match self {
12481260
BitArrayMatchedValue::LiteralString { bytes, .. } => Some(bytes.view_bits::<Msb0>()),
12491261
BitArrayMatchedValue::Assign { value, .. } => value.constant_bits(),
1262+
BitArrayMatchedValue::LiteralInt { bits, .. } => bits.as_deref(),
12501263

12511264
// TODO: We could also implement the interfering optimisation for
1252-
// literal ints as well, but that will be a bit trickier than
1253-
// strings.
1254-
BitArrayMatchedValue::LiteralInt { .. }
1255-
| BitArrayMatchedValue::LiteralFloat(_)
1265+
// literal floats as well, but the usefulness is questionable
1266+
BitArrayMatchedValue::LiteralFloat(_)
12561267
| BitArrayMatchedValue::Variable(_)
12571268
| BitArrayMatchedValue::Discard(_) => None,
12581269
}
@@ -1267,7 +1278,9 @@ impl BitArrayMatchedValue {
12671278
) -> Option<ImpossibleBitArraySegmentPattern> {
12681279
match self {
12691280
BitArrayMatchedValue::Assign { value, .. } => value.is_impossible_segment(read_action),
1270-
BitArrayMatchedValue::LiteralInt { value, location } => {
1281+
BitArrayMatchedValue::LiteralInt {
1282+
value, location, ..
1283+
} => {
12711284
let size = read_action.size.constant_bits()?.to_u32()?;
12721285
if representable_with_bits(value.clone(), size, read_action.signed) {
12731286
None
@@ -3521,6 +3534,7 @@ fn segment_matched_value(
35213534
} => BitArrayMatchedValue::LiteralInt {
35223535
value: int_value.clone(),
35233536
location: *location,
3537+
bits: int_to_bits(int_value, &read_action.size, read_action.endianness),
35243538
},
35253539
ast::Pattern::Float { value, .. } => BitArrayMatchedValue::LiteralFloat(value.clone()),
35263540
ast::Pattern::String { value, .. } if segment.has_utf16_option() => {
@@ -3558,6 +3572,80 @@ fn segment_matched_value(
35583572
}
35593573
}
35603574

3575+
/// Convert a BigInt to its bitarray representation.
3576+
/// Returns `None` if the read size is not constant, or if the size is larger than
3577+
/// [BitArrayMatchedValue::MAX_BITS_INTERFERENCE]
3578+
fn int_to_bits(
3579+
value: &BigInt,
3580+
read_size: &ReadSize,
3581+
endianness: Endianness,
3582+
) -> Option<BitVec<u8, Msb0>> {
3583+
let size = read_size.constant_bits()?.to_usize()?;
3584+
if size == 0 || size > BitArrayMatchedValue::MAX_BITS_INTERFERENCE {
3585+
return None;
3586+
}
3587+
// Pad negative numbers with 1s (true) and non-negative numbers with 0s (false)
3588+
let pad_digit = value.sign() == Sign::Minus;
3589+
3590+
// There are 3 cases, which are easier to handle separately by endianness
3591+
// If the size of the bigint bytes equals the expected bits, we can return them as-is
3592+
// If there are more bits than we need, we need to trim some of the most significant bits.
3593+
// E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits.
3594+
// If there are fewer bits than we need, we need to add some.
3595+
// E.g. `6:13` yields one byte which we need to pad with 5 bits
3596+
let bits = match endianness {
3597+
// Big endian is easier to work with as we can simply trim or pad the front
3598+
Endianness::Big => {
3599+
let bytes = value.to_signed_bytes_be();
3600+
let bytes_size = bytes.len() * 8;
3601+
match bytes_size.cmp(&size) {
3602+
Ordering::Greater => BitVec::from_bitslice(&bytes.view_bits()[bytes_size - size..]),
3603+
Ordering::Less => {
3604+
let mut bits = BitVec::repeat(pad_digit, size - bytes_size);
3605+
bits.extend_from_raw_slice(&bytes);
3606+
bits
3607+
}
3608+
Ordering::Equal => BitVec::from_vec(bytes),
3609+
}
3610+
}
3611+
Endianness::Little => {
3612+
let mut bytes = value.to_signed_bytes_le();
3613+
let bytes_size = bytes.len() * 8;
3614+
match bytes_size.cmp(&size) {
3615+
Ordering::Greater => {
3616+
// First truncate the trailing most significant *bytes*
3617+
// E.g. truncate 0xffffff:10 to two bytes (0xffff)
3618+
let required_bytes = size.div_ceil(8);
3619+
bytes.truncate(required_bytes);
3620+
3621+
let remainder = size % 8;
3622+
if remainder == 0 {
3623+
BitVec::from_vec(bytes)
3624+
} else {
3625+
// If the size is not a multiple of 8, we need to truncate the most significant bits
3626+
// As they are in the last byte, we leftshift by the appropriate amount and
3627+
// truncate the final bits after conversion
3628+
let last_byte = bytes.last_mut().expect("bytes must not be empty");
3629+
*last_byte <<= 8 - remainder;
3630+
3631+
let mut bits = BitVec::from_vec(bytes);
3632+
bits.truncate(size);
3633+
bits
3634+
}
3635+
}
3636+
Ordering::Less => {
3637+
let mut bits = BitVec::from_vec(bytes);
3638+
let padding: BitVec<u8, Msb0> = BitVec::repeat(pad_digit, size - bytes_size);
3639+
bits.extend_from_bitslice(padding.as_bitslice());
3640+
bits
3641+
}
3642+
Ordering::Equal => BitVec::from_vec(bytes),
3643+
}
3644+
}
3645+
};
3646+
Some(bits)
3647+
}
3648+
35613649
fn segment_size(
35623650
segment: &TypedPatternBitArraySegment,
35633651
pattern_variables: &HashMap<EcoString, ReadAction>,
@@ -3727,3 +3815,100 @@ mod representable_with_bits_test {
37273815
assert!(!representable_with_bits(BigInt::from(-9), 3, true));
37283816
}
37293817
}
3818+
3819+
#[cfg(test)]
3820+
mod int_to_bits_test {
3821+
use std::assert_eq;
3822+
3823+
use crate::{
3824+
ast::Endianness,
3825+
exhaustiveness::{BitArrayMatchedValue, ReadSize, int_to_bits},
3826+
};
3827+
use bitvec::{bitvec, order::Msb0, vec::BitVec};
3828+
use num_bigint::BigInt;
3829+
3830+
fn read_size(size: usize) -> ReadSize {
3831+
ReadSize::ConstantBits(BigInt::from(size))
3832+
}
3833+
3834+
#[test]
3835+
fn int_to_bits_size_too_big() {
3836+
assert!(
3837+
int_to_bits(
3838+
&BigInt::ZERO,
3839+
&read_size(BitArrayMatchedValue::MAX_BITS_INTERFERENCE + 1),
3840+
Endianness::Big
3841+
)
3842+
.is_none()
3843+
);
3844+
}
3845+
3846+
#[test]
3847+
fn int_to_bits_zero() {
3848+
let expect = Some(bitvec![u8, Msb0; 0; 3]);
3849+
assert_eq!(
3850+
int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Big),
3851+
expect
3852+
);
3853+
assert_eq!(
3854+
int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Little),
3855+
expect
3856+
);
3857+
3858+
let expect = Some(bitvec![u8, Msb0; 0; 10]);
3859+
assert_eq!(
3860+
int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Big),
3861+
expect
3862+
);
3863+
assert_eq!(
3864+
int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Little),
3865+
expect
3866+
);
3867+
}
3868+
3869+
#[test]
3870+
fn int_to_bits_positive() {
3871+
// Exact match
3872+
assert_eq!(
3873+
int_to_bits(&BigInt::from(0xff00), &read_size(16), Endianness::Big),
3874+
Some(BitVec::<u8, Msb0>::from_vec(vec![0xff, 0x00])),
3875+
);
3876+
assert_eq!(
3877+
int_to_bits(&BigInt::from(0xff00), &read_size(16), Endianness::Little),
3878+
Some(BitVec::<u8, Msb0>::from_vec(vec![0x00, 0xff])),
3879+
);
3880+
3881+
// Too many bits in int
3882+
assert_eq!(
3883+
int_to_bits(&BigInt::from(0xff00ff), &read_size(12), Endianness::Big),
3884+
Some(bitvec![u8, Msb0; 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]),
3885+
);
3886+
assert_eq!(
3887+
int_to_bits(&BigInt::from(0xff00ff), &read_size(12), Endianness::Little),
3888+
Some(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
3889+
);
3890+
3891+
// Too few bits in int
3892+
assert_eq!(
3893+
int_to_bits(&BigInt::from(0xff), &read_size(12), Endianness::Big),
3894+
Some(bitvec![u8, Msb0; 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]),
3895+
);
3896+
assert_eq!(
3897+
int_to_bits(&BigInt::from(0xff), &read_size(12), Endianness::Little),
3898+
Some(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
3899+
);
3900+
}
3901+
3902+
#[test]
3903+
fn int_to_bits_negative() {
3904+
// Too few bits in int
3905+
assert_eq!(
3906+
int_to_bits(&BigInt::from(-128), &read_size(12), Endianness::Big),
3907+
Some(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]),
3908+
);
3909+
assert_eq!(
3910+
int_to_bits(&BigInt::from(-128), &read_size(12), Endianness::Little),
3911+
Some(bitvec![u8, Msb0; 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]),
3912+
);
3913+
}
3914+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
source: compiler-core/src/type_/tests/warnings.rs
3+
expression: "\npub fn wibble(bits) {\n case bits {\n <<0b1:1, _:1>> -> 1\n <<0b11:2>> -> 2\n _ -> 3\n }\n}"
4+
---
5+
----- SOURCE CODE
6+
7+
pub fn wibble(bits) {
8+
case bits {
9+
<<0b1:1, _:1>> -> 1
10+
<<0b11:2>> -> 2
11+
_ -> 3
12+
}
13+
}
14+
15+
----- WARNING
16+
warning: Unreachable pattern
17+
┌─ /src/warning/wrn.gleam:5:5
18+
19+
5<<0b11:2>> -> 2
20+
^^^^^^^^^^
21+
22+
This pattern cannot be reached as a previous pattern matches the same
23+
values.
24+
25+
Hint: It can be safely removed.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
source: compiler-core/src/type_/tests/warnings.rs
3+
expression: "\npub fn wibble(bits) {\n case bits {\n <<\"a\">> -> 1\n <<97>> -> 2\n _ -> 3\n }\n}"
4+
---
5+
----- SOURCE CODE
6+
7+
pub fn wibble(bits) {
8+
case bits {
9+
<<"a">> -> 1
10+
<<97>> -> 2
11+
_ -> 3
12+
}
13+
}
14+
15+
----- WARNING
16+
warning: Unreachable pattern
17+
┌─ /src/warning/wrn.gleam:5:5
18+
19+
5<<97>> -> 2
20+
^^^^^^
21+
22+
This pattern cannot be reached as a previous pattern matches the same
23+
values.
24+
25+
Hint: It can be safely removed.

compiler-core/src/type_/tests/warnings.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4515,6 +4515,34 @@ pub fn wibble(bits) {
45154515
);
45164516
}
45174517

4518+
#[test]
4519+
fn unreachable_int_pattern_with_string_of_same_value() {
4520+
assert_warning!(
4521+
r#"
4522+
pub fn wibble(bits) {
4523+
case bits {
4524+
<<"a">> -> 1
4525+
<<97>> -> 2
4526+
_ -> 3
4527+
}
4528+
}"#
4529+
);
4530+
}
4531+
4532+
#[test]
4533+
fn unreachable_int_pattern_with_prefix_int() {
4534+
assert_warning!(
4535+
r#"
4536+
pub fn wibble(bits) {
4537+
case bits {
4538+
<<0b1:1, _:1>> -> 1
4539+
<<0b11:2>> -> 2
4540+
_ -> 3
4541+
}
4542+
}"#
4543+
);
4544+
}
4545+
45184546
#[test]
45194547
fn unused_recursive_function_argument() {
45204548
assert_warning!(

0 commit comments

Comments
 (0)