Skip to content

Commit 475f491

Browse files
committed
✨ Include constant int segments in interference analysis
1 parent 315a23a commit 475f491

7 files changed

+364
-13
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

compiler-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ radix_trie = "0.2.1"
4949
# Ensuring recursive type-checking doesn't stack overflow
5050
stacker = "0.1.21"
5151
# Manipulating bit arrays
52-
bitvec = "1"
52+
bitvec = { version = "1", features = ["serde"] }
5353

5454
async-trait.workspace = true
5555
base16.workspace = true

compiler-core/src/exhaustiveness.rs

Lines changed: 241 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ use crate::{
8181
is_prelude_module, string,
8282
},
8383
};
84-
use bitvec::{order::Msb0, slice::BitSlice, view::BitView};
84+
use bitvec::{order::Msb0, slice::BitSlice, vec::BitVec, view::BitView};
8585
use ecow::EcoString;
8686
use id_arena::{Arena, Id};
8787
use itertools::Itertools;
@@ -90,6 +90,7 @@ use num_traits::ToPrimitive;
9090
use radix_trie::{Trie, TrieCommon};
9191
use std::{
9292
cell::RefCell,
93+
cmp::Ordering,
9394
collections::{HashMap, HashSet, VecDeque},
9495
hash::Hash,
9596
sync::Arc,
@@ -1212,6 +1213,9 @@ pub enum BitArrayMatchedValue {
12121213
/// is deemed unreachable: it is the location this literal value comes
12131214
/// from in the whole pattern.
12141215
location: SrcSpan,
1216+
/// The bits representing the given literal integer, with the correct
1217+
/// signed- and endianness as specified in the bit array segment.
1218+
bits: Result<BitVec<u8, Msb0>, IntToBitsError>,
12151219
},
12161220
LiteralString {
12171221
value: EcoString,
@@ -1229,6 +1233,12 @@ pub enum BitArrayMatchedValue {
12291233
}
12301234

12311235
impl BitArrayMatchedValue {
1236+
/// This is an arbitrary limit beyond which interference _may_ no longer be done.
1237+
/// This is necessary because people may write very silly segments like
1238+
/// `<<0:9_007_199_254_740_992>>`, which would allocate around a wee petabyte of memory.
1239+
/// Literal strings are already in memory, and thus ignore this limit.
1240+
const MAX_BITS_INTERFERENCE: u32 = u16::MAX as u32;
1241+
12321242
pub(crate) fn is_literal(&self) -> bool {
12331243
match self {
12341244
BitArrayMatchedValue::LiteralFloat(_)
@@ -1247,12 +1257,11 @@ impl BitArrayMatchedValue {
12471257
match self {
12481258
BitArrayMatchedValue::LiteralString { bytes, .. } => Some(bytes.view_bits::<Msb0>()),
12491259
BitArrayMatchedValue::Assign { value, .. } => value.constant_bits(),
1260+
BitArrayMatchedValue::LiteralInt { bits, .. } => bits.as_deref().ok(),
12501261

12511262
// TODO: We could also implement the interfering optimisation for
1252-
// literal ints as well, but that will be a bit trickier than
1253-
// strings.
1254-
BitArrayMatchedValue::LiteralInt { .. }
1255-
| BitArrayMatchedValue::LiteralFloat(_)
1263+
// literal floats as well, but the usefulness is questionable
1264+
BitArrayMatchedValue::LiteralFloat(_)
12561265
| BitArrayMatchedValue::Variable(_)
12571266
| BitArrayMatchedValue::Discard(_) => None,
12581267
}
@@ -1267,19 +1276,21 @@ impl BitArrayMatchedValue {
12671276
) -> Option<ImpossibleBitArraySegmentPattern> {
12681277
match self {
12691278
BitArrayMatchedValue::Assign { value, .. } => value.is_impossible_segment(read_action),
1270-
BitArrayMatchedValue::LiteralInt { value, location } => {
1271-
let size = read_action.size.constant_bits()?.to_u32()?;
1272-
if representable_with_bits(value, size, read_action.signed) {
1273-
None
1274-
} else {
1279+
BitArrayMatchedValue::LiteralInt {
1280+
value,
1281+
location,
1282+
bits,
1283+
} => match bits {
1284+
Err(IntToBitsError::Unrepresentable { size }) => {
12751285
Some(ImpossibleBitArraySegmentPattern::UnrepresentableInteger {
12761286
value: value.clone(),
1277-
size,
1287+
size: *size,
12781288
location: *location,
12791289
signed: read_action.signed,
12801290
})
12811291
}
1282-
}
1292+
_ => None,
1293+
},
12831294

12841295
BitArrayMatchedValue::LiteralFloat(_)
12851296
| BitArrayMatchedValue::LiteralString { .. }
@@ -3521,6 +3532,12 @@ fn segment_matched_value(
35213532
} => BitArrayMatchedValue::LiteralInt {
35223533
value: int_value.clone(),
35233534
location: *location,
3535+
bits: int_to_bits(
3536+
int_value,
3537+
&read_action.size,
3538+
read_action.endianness,
3539+
read_action.signed,
3540+
),
35243541
},
35253542
ast::Pattern::Float { value, .. } => BitArrayMatchedValue::LiteralFloat(value.clone()),
35263543
ast::Pattern::String { value, .. } if segment.has_utf16_option() => {
@@ -3558,6 +3575,92 @@ fn segment_matched_value(
35583575
}
35593576
}
35603577

3578+
fn int_to_bits(
3579+
value: &BigInt,
3580+
read_size: &ReadSize,
3581+
endianness: Endianness,
3582+
signed: bool,
3583+
) -> Result<BitVec<u8, Msb0>, IntToBitsError> {
3584+
let size = read_size
3585+
.constant_bits()
3586+
.ok_or(IntToBitsError::NonConstantSize)?
3587+
.to_u32()
3588+
.ok_or(IntToBitsError::ExceedsMaximumSize)?;
3589+
3590+
if !representable_with_bits(value, size, signed) {
3591+
return Err(IntToBitsError::Unrepresentable { size });
3592+
} else if size > BitArrayMatchedValue::MAX_BITS_INTERFERENCE {
3593+
return Err(IntToBitsError::ExceedsMaximumSize);
3594+
}
3595+
3596+
// Pad negative numbers with 1s (true) and non-negative numbers with 0s (false)
3597+
let pad_digit = value.sign() == Sign::Minus;
3598+
let size = size as usize;
3599+
let mut bytes = int_to_bytes(value, endianness, signed);
3600+
let bytes_size = bytes.len() * 8;
3601+
3602+
// There are 3 cases, which are easier to handle separately by endianness
3603+
// If the size of the bigint bytes equals the expected bits, we can return them as-is
3604+
// If there are more bits than we need, we need to trim some of the most significant bits.
3605+
// E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits.
3606+
// Values like 999:3 are illegal and caught by the guard at the start of the function.
3607+
// If there are fewer bits than we need, we need to add some.
3608+
// E.g. `6:13` yields one byte which we need to pad with 5 bits
3609+
let bits = match (endianness, bytes_size.cmp(&size)) {
3610+
(_, Ordering::Equal) => BitVec::from_vec(bytes),
3611+
3612+
(Endianness::Big, Ordering::Greater) => {
3613+
BitVec::from_bitslice(&bytes.view_bits()[bytes_size - size..])
3614+
}
3615+
(Endianness::Big, Ordering::Less) => {
3616+
let mut bits = BitVec::repeat(pad_digit, size - bytes_size);
3617+
bits.extend_from_raw_slice(&bytes);
3618+
bits
3619+
}
3620+
3621+
(Endianness::Little, Ordering::Greater) => {
3622+
// If the difference is greater than a byte, we returned an Error earlier
3623+
let remainder = size % 8;
3624+
if remainder == 0 {
3625+
BitVec::from_vec(bytes)
3626+
} else {
3627+
// If the size is not a multiple of 8, we need to truncate the most significant bits.
3628+
// As they are in the last byte, we leftshift by the appropriate amount and
3629+
// truncate the final bits after conversion
3630+
let last_byte = bytes.last_mut().expect("bytes must not be empty");
3631+
*last_byte <<= 8 - remainder;
3632+
3633+
let mut bits = BitVec::from_vec(bytes);
3634+
bits.truncate(size);
3635+
bits
3636+
}
3637+
}
3638+
(Endianness::Little, Ordering::Less) => {
3639+
let mut bits = BitVec::from_vec(bytes);
3640+
let padding: BitVec<u8, Msb0> = BitVec::repeat(pad_digit, size - bytes_size);
3641+
bits.extend_from_bitslice(padding.as_bitslice());
3642+
bits
3643+
}
3644+
};
3645+
Ok(bits)
3646+
}
3647+
3648+
fn int_to_bytes(value: &BigInt, endianness: Endianness, signed: bool) -> Vec<u8> {
3649+
match (endianness, signed) {
3650+
(Endianness::Big, false) => value.to_bytes_be().1,
3651+
(Endianness::Big, true) => value.to_signed_bytes_be(),
3652+
(Endianness::Little, false) => value.to_bytes_le().1,
3653+
(Endianness::Little, true) => value.to_signed_bytes_le(),
3654+
}
3655+
}
3656+
3657+
#[derive(Clone, Copy, Eq, PartialEq, Debug, serde::Serialize, serde::Deserialize)]
3658+
pub enum IntToBitsError {
3659+
Unrepresentable { size: u32 },
3660+
ExceedsMaximumSize,
3661+
NonConstantSize,
3662+
}
3663+
35613664
fn segment_size(
35623665
segment: &TypedPatternBitArraySegment,
35633666
pattern_variables: &HashMap<EcoString, ReadAction>,
@@ -3762,3 +3865,129 @@ mod representable_with_bits_test {
37623865
assert!(representable_with_bits(&BigInt::from(-9), 5, true));
37633866
}
37643867
}
3868+
3869+
#[cfg(test)]
3870+
mod int_to_bits_test {
3871+
use std::assert_eq;
3872+
3873+
use crate::{
3874+
ast::Endianness,
3875+
exhaustiveness::{BitArrayMatchedValue, IntToBitsError, ReadSize, int_to_bits},
3876+
};
3877+
use bitvec::{bitvec, order::Msb0, vec::BitVec};
3878+
use num_bigint::BigInt;
3879+
3880+
fn read_size(size: u32) -> ReadSize {
3881+
ReadSize::ConstantBits(BigInt::from(size))
3882+
}
3883+
3884+
#[test]
3885+
fn int_to_bits_size_too_big() {
3886+
assert_eq!(
3887+
int_to_bits(
3888+
&BigInt::ZERO,
3889+
&read_size(BitArrayMatchedValue::MAX_BITS_INTERFERENCE + 1),
3890+
Endianness::Big,
3891+
true,
3892+
),
3893+
Err(IntToBitsError::ExceedsMaximumSize),
3894+
);
3895+
}
3896+
3897+
#[test]
3898+
fn int_to_bits_zero() {
3899+
let expect = Ok(bitvec![u8, Msb0; 0; 3]);
3900+
assert_eq!(
3901+
int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Big, false),
3902+
expect
3903+
);
3904+
assert_eq!(
3905+
int_to_bits(&BigInt::ZERO, &read_size(3), Endianness::Little, false),
3906+
expect
3907+
);
3908+
3909+
let expect = Ok(bitvec![u8, Msb0; 0; 10]);
3910+
assert_eq!(
3911+
int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Big, false),
3912+
expect
3913+
);
3914+
assert_eq!(
3915+
int_to_bits(&BigInt::ZERO, &read_size(10), Endianness::Little, false),
3916+
expect
3917+
);
3918+
}
3919+
3920+
#[test]
3921+
fn int_to_bits_positive() {
3922+
// Exact match
3923+
assert_eq!(
3924+
int_to_bits(
3925+
&BigInt::from(0xff00),
3926+
&read_size(16),
3927+
Endianness::Big,
3928+
false
3929+
),
3930+
Ok(BitVec::<u8, Msb0>::from_vec(vec![0xff, 0x00])),
3931+
);
3932+
assert_eq!(
3933+
int_to_bits(
3934+
&BigInt::from(0xff00),
3935+
&read_size(16),
3936+
Endianness::Little,
3937+
false
3938+
),
3939+
Ok(BitVec::<u8, Msb0>::from_vec(vec![0x00, 0xff])),
3940+
);
3941+
3942+
assert_eq!(
3943+
int_to_bits(
3944+
&BigInt::from(0b11_1111_0000),
3945+
&read_size(10),
3946+
Endianness::Big,
3947+
false
3948+
),
3949+
Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
3950+
);
3951+
assert_eq!(
3952+
int_to_bits(
3953+
&BigInt::from(0b11_1111_0000),
3954+
&read_size(10),
3955+
Endianness::Little,
3956+
false
3957+
),
3958+
Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 0, 0, 0, 0, 1, 1]),
3959+
);
3960+
3961+
// Too few bits in int
3962+
assert_eq!(
3963+
int_to_bits(&BigInt::from(0xff), &read_size(12), Endianness::Big, false),
3964+
Ok(bitvec![u8, Msb0; 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]),
3965+
);
3966+
assert_eq!(
3967+
int_to_bits(
3968+
&BigInt::from(0xff),
3969+
&read_size(12),
3970+
Endianness::Little,
3971+
false
3972+
),
3973+
Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]),
3974+
);
3975+
}
3976+
3977+
#[test]
3978+
fn int_to_bits_signed() {
3979+
assert_eq!(
3980+
int_to_bits(&BigInt::from(-128), &read_size(12), Endianness::Big, true),
3981+
Ok(bitvec![u8, Msb0; 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]),
3982+
);
3983+
assert_eq!(
3984+
int_to_bits(
3985+
&BigInt::from(-128),
3986+
&read_size(12),
3987+
Endianness::Little,
3988+
true
3989+
),
3990+
Ok(bitvec![u8, Msb0; 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1]),
3991+
);
3992+
}
3993+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---
2+
source: compiler-core/src/type_/tests/warnings.rs
3+
expression: "\npub fn wibble(bits) {\n case bits {\n <<97:3>> -> 1\n <<\"a\">> -> 2\n _ -> 3\n }\n}\n"
4+
---
5+
----- SOURCE CODE
6+
7+
pub fn wibble(bits) {
8+
case bits {
9+
<<97:3>> -> 1
10+
<<"a">> -> 2
11+
_ -> 3
12+
}
13+
}
14+
15+
16+
----- WARNING
17+
warning: Unreachable pattern
18+
┌─ /src/warning/wrn.gleam:4:5
19+
20+
4<<97:3>> -> 1
21+
^^^^^^^^
22+
│ │
23+
A 3 bits unsigned integer will never match this value
24+
25+
This pattern cannot be reached as it contains segments that will never
26+
match.
27+
28+
Hint: It can be safely removed.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
source: compiler-core/src/type_/tests/warnings.rs
3+
expression: "\npub fn wibble(bits) {\n case bits {\n <<0b1:1, _:1>> -> 1\n <<0b11:2>> -> 2\n _ -> 3\n }\n}"
4+
---
5+
----- SOURCE CODE
6+
7+
pub fn wibble(bits) {
8+
case bits {
9+
<<0b1:1, _:1>> -> 1
10+
<<0b11:2>> -> 2
11+
_ -> 3
12+
}
13+
}
14+
15+
----- WARNING
16+
warning: Unreachable pattern
17+
┌─ /src/warning/wrn.gleam:5:5
18+
19+
5<<0b11:2>> -> 2
20+
^^^^^^^^^^
21+
22+
This pattern cannot be reached as a previous pattern matches the same
23+
values.
24+
25+
Hint: It can be safely removed.

0 commit comments

Comments
 (0)