@@ -81,15 +81,16 @@ use crate::{
8181 is_prelude_module, string,
8282 } ,
8383} ;
84- use bitvec:: { order:: Msb0 , slice:: BitSlice , view:: BitView } ;
84+ use bitvec:: { order:: Msb0 , slice:: BitSlice , vec :: BitVec , view:: BitView } ;
8585use ecow:: EcoString ;
8686use id_arena:: { Arena , Id } ;
8787use itertools:: Itertools ;
88- use num_bigint:: BigInt ;
88+ use num_bigint:: { BigInt , Sign } ;
8989use num_traits:: ToPrimitive ;
9090use radix_trie:: { Trie , TrieCommon } ;
9191use std:: {
9292 cell:: RefCell ,
93+ cmp:: Ordering ,
9394 collections:: { HashMap , HashSet , VecDeque } ,
9495 hash:: Hash ,
9596 sync:: Arc ,
@@ -1212,6 +1213,11 @@ pub enum BitArrayMatchedValue {
12121213 /// is deemed unreachable: it is the location this literal value comes
12131214 /// from in the whole pattern.
12141215 location : SrcSpan ,
1216+ /// The bits representing the given literal integer, with the correct
1217+ /// signed- and endianness as specified in the bit array segment.
1218+ /// Present iff the segment has a constant size that is not greater than
1219+ /// [BitArrayMatchedValue::MAX_BITS_INTERFERENCE]
1220+ bits : Option < BitVec < u8 , Msb0 > > ,
12151221 } ,
12161222 LiteralString {
12171223 value : EcoString ,
@@ -1229,6 +1235,12 @@ pub enum BitArrayMatchedValue {
12291235}
12301236
12311237impl BitArrayMatchedValue {
1238+ /// This is an arbitrary limit beyond which interference _may_ no longer be done.
1239+ /// This is necessary because people may write very silly segments like
1240+ /// `<<0:9_007_199_254_740_992>>`, which would allocate around a wee petabyte of memory.
1241+ /// Literal strings are already in memory, and thus ignore this limit.
1242+ const MAX_BITS_INTERFERENCE : usize = u16:: MAX as usize ;
1243+
12321244 pub ( crate ) fn is_literal ( & self ) -> bool {
12331245 match self {
12341246 BitArrayMatchedValue :: LiteralFloat ( _)
@@ -1247,12 +1259,11 @@ impl BitArrayMatchedValue {
12471259 match self {
12481260 BitArrayMatchedValue :: LiteralString { bytes, .. } => Some ( bytes. view_bits :: < Msb0 > ( ) ) ,
12491261 BitArrayMatchedValue :: Assign { value, .. } => value. constant_bits ( ) ,
1262+ BitArrayMatchedValue :: LiteralInt { bits, .. } => bits. as_deref ( ) ,
12501263
12511264 // TODO: We could also implement the interfering optimisation for
1252- // literal ints as well, but that will be a bit trickier than
1253- // strings.
1254- BitArrayMatchedValue :: LiteralInt { .. }
1255- | BitArrayMatchedValue :: LiteralFloat ( _)
1265+ // literal floats as well, but the usefulness is questionable
1266+ BitArrayMatchedValue :: LiteralFloat ( _)
12561267 | BitArrayMatchedValue :: Variable ( _)
12571268 | BitArrayMatchedValue :: Discard ( _) => None ,
12581269 }
@@ -1267,7 +1278,9 @@ impl BitArrayMatchedValue {
12671278 ) -> Option < ImpossibleBitArraySegmentPattern > {
12681279 match self {
12691280 BitArrayMatchedValue :: Assign { value, .. } => value. is_impossible_segment ( read_action) ,
1270- BitArrayMatchedValue :: LiteralInt { value, location } => {
1281+ BitArrayMatchedValue :: LiteralInt {
1282+ value, location, ..
1283+ } => {
12711284 let size = read_action. size . constant_bits ( ) ?. to_u32 ( ) ?;
12721285 if representable_with_bits ( value. clone ( ) , size, read_action. signed ) {
12731286 None
@@ -3521,6 +3534,7 @@ fn segment_matched_value(
35213534 } => BitArrayMatchedValue :: LiteralInt {
35223535 value : int_value. clone ( ) ,
35233536 location : * location,
3537+ bits : int_to_bits ( int_value, & read_action. size , read_action. endianness ) ,
35243538 } ,
35253539 ast:: Pattern :: Float { value, .. } => BitArrayMatchedValue :: LiteralFloat ( value. clone ( ) ) ,
35263540 ast:: Pattern :: String { value, .. } if segment. has_utf16_option ( ) => {
@@ -3558,6 +3572,80 @@ fn segment_matched_value(
35583572 }
35593573}
35603574
3575+ /// Convert a BigInt to its bitarray representation.
3576+ /// Returns `None` if the read size is not constant, or if the size is larger than
3577+ /// [BitArrayMatchedValue::MAX_BITS_INTERFERENCE]
3578+ fn int_to_bits (
3579+ value : & BigInt ,
3580+ read_size : & ReadSize ,
3581+ endianness : Endianness ,
3582+ ) -> Option < BitVec < u8 , Msb0 > > {
3583+ let size = read_size. constant_bits ( ) ?. to_usize ( ) ?;
3584+ if size == 0 || size > BitArrayMatchedValue :: MAX_BITS_INTERFERENCE {
3585+ return None ;
3586+ }
3587+ // Pad negative numbers with 1s (true) and non-negative numbers with 0s (false)
3588+ let pad_digit = value. sign ( ) == Sign :: Minus ;
3589+
3590+ // There are 3 cases, which are easier to handle separately by endianness
3591+ // If the size of the bigint bytes equals the expected bits, we can return them as-is
3592+ // If there are more bits than we need, we need to trim some of the most significant bits.
3593+ // E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits.
3594+ // If there are fewer bits than we need, we need to add some.
3595+ // E.g. `6:13` yields one byte which we need to pad with 5 bits
3596+ let bits = match endianness {
3597+ // Big endian is easier to work with as we can simply trim or pad the front
3598+ Endianness :: Big => {
3599+ let bytes = value. to_signed_bytes_be ( ) ;
3600+ let bytes_size = bytes. len ( ) * 8 ;
3601+ match bytes_size. cmp ( & size) {
3602+ Ordering :: Greater => BitVec :: from_bitslice ( & bytes. view_bits ( ) [ bytes_size - size..] ) ,
3603+ Ordering :: Less => {
3604+ let mut bits = BitVec :: repeat ( pad_digit, size - bytes_size) ;
3605+ bits. extend_from_raw_slice ( & bytes) ;
3606+ bits
3607+ }
3608+ Ordering :: Equal => BitVec :: from_vec ( bytes) ,
3609+ }
3610+ }
3611+ Endianness :: Little => {
3612+ let mut bytes = value. to_signed_bytes_le ( ) ;
3613+ let bytes_size = bytes. len ( ) * 8 ;
3614+ match bytes_size. cmp ( & size) {
3615+ Ordering :: Greater => {
3616+ // First truncate the trailing most significant *bytes*
3617+ // E.g. truncate 0xffffff:10 to two bytes (0xffff)
3618+ let required_bytes = size. div_ceil ( 8 ) ;
3619+ bytes. truncate ( required_bytes) ;
3620+
3621+ let remainder = size % 8 ;
3622+ if remainder == 0 {
3623+ BitVec :: from_vec ( bytes)
3624+ } else {
3625+ // If the size is not a multiple of 8, we need to truncate the most significant bits
3626+ // As they are in the last byte, we leftshift by the appropriate amount and
3627+ // truncate the final bits after conversion
3628+ let last_byte = bytes. last_mut ( ) . expect ( "bytes must not be empty" ) ;
3629+ * last_byte <<= 8 - remainder;
3630+
3631+ let mut bits = BitVec :: from_vec ( bytes) ;
3632+ bits. truncate ( size) ;
3633+ bits
3634+ }
3635+ }
3636+ Ordering :: Less => {
3637+ let mut bits = BitVec :: from_vec ( bytes) ;
3638+ let padding: BitVec < u8 , Msb0 > = BitVec :: repeat ( pad_digit, size - bytes_size) ;
3639+ bits. extend_from_bitslice ( padding. as_bitslice ( ) ) ;
3640+ bits
3641+ }
3642+ Ordering :: Equal => BitVec :: from_vec ( bytes) ,
3643+ }
3644+ }
3645+ } ;
3646+ Some ( bits)
3647+ }
3648+
35613649fn segment_size (
35623650 segment : & TypedPatternBitArraySegment ,
35633651 pattern_variables : & HashMap < EcoString , ReadAction > ,
@@ -3727,3 +3815,100 @@ mod representable_with_bits_test {
37273815 assert ! ( !representable_with_bits( BigInt :: from( -9 ) , 3 , true ) ) ;
37283816 }
37293817}
3818+
3819+ #[ cfg( test) ]
3820+ mod int_to_bits_test {
3821+ use std:: assert_eq;
3822+
3823+ use crate :: {
3824+ ast:: Endianness ,
3825+ exhaustiveness:: { BitArrayMatchedValue , ReadSize , int_to_bits} ,
3826+ } ;
3827+ use bitvec:: { bitvec, order:: Msb0 , vec:: BitVec } ;
3828+ use num_bigint:: BigInt ;
3829+
3830+ fn read_size ( size : usize ) -> ReadSize {
3831+ ReadSize :: ConstantBits ( BigInt :: from ( size) )
3832+ }
3833+
3834+ #[ test]
3835+ fn int_to_bits_size_too_big ( ) {
3836+ assert ! (
3837+ int_to_bits(
3838+ & BigInt :: ZERO ,
3839+ & read_size( BitArrayMatchedValue :: MAX_BITS_INTERFERENCE + 1 ) ,
3840+ Endianness :: Big
3841+ )
3842+ . is_none( )
3843+ ) ;
3844+ }
3845+
3846+ #[ test]
3847+ fn int_to_bits_zero ( ) {
3848+ let expect = Some ( bitvec ! [ u8 , Msb0 ; 0 ; 3 ] ) ;
3849+ assert_eq ! (
3850+ int_to_bits( & BigInt :: ZERO , & read_size( 3 ) , Endianness :: Big ) ,
3851+ expect
3852+ ) ;
3853+ assert_eq ! (
3854+ int_to_bits( & BigInt :: ZERO , & read_size( 3 ) , Endianness :: Little ) ,
3855+ expect
3856+ ) ;
3857+
3858+ let expect = Some ( bitvec ! [ u8 , Msb0 ; 0 ; 10 ] ) ;
3859+ assert_eq ! (
3860+ int_to_bits( & BigInt :: ZERO , & read_size( 10 ) , Endianness :: Big ) ,
3861+ expect
3862+ ) ;
3863+ assert_eq ! (
3864+ int_to_bits( & BigInt :: ZERO , & read_size( 10 ) , Endianness :: Little ) ,
3865+ expect
3866+ ) ;
3867+ }
3868+
3869+ #[ test]
3870+ fn int_to_bits_positive ( ) {
3871+ // Exact match
3872+ assert_eq ! (
3873+ int_to_bits( & BigInt :: from( 0xff00 ) , & read_size( 16 ) , Endianness :: Big ) ,
3874+ Some ( BitVec :: <u8 , Msb0 >:: from_vec( vec![ 0xff , 0x00 ] ) ) ,
3875+ ) ;
3876+ assert_eq ! (
3877+ int_to_bits( & BigInt :: from( 0xff00 ) , & read_size( 16 ) , Endianness :: Little ) ,
3878+ Some ( BitVec :: <u8 , Msb0 >:: from_vec( vec![ 0x00 , 0xff ] ) ) ,
3879+ ) ;
3880+
3881+ // Too many bits in int
3882+ assert_eq ! (
3883+ int_to_bits( & BigInt :: from( 0xff00ff ) , & read_size( 12 ) , Endianness :: Big ) ,
3884+ Some ( bitvec![ u8 , Msb0 ; 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ] ) ,
3885+ ) ;
3886+ assert_eq ! (
3887+ int_to_bits( & BigInt :: from( 0xff00ff ) , & read_size( 12 ) , Endianness :: Little ) ,
3888+ Some ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ] ) ,
3889+ ) ;
3890+
3891+ // Too few bits in int
3892+ assert_eq ! (
3893+ int_to_bits( & BigInt :: from( 0xff ) , & read_size( 12 ) , Endianness :: Big ) ,
3894+ Some ( bitvec![ u8 , Msb0 ; 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ] ) ,
3895+ ) ;
3896+ assert_eq ! (
3897+ int_to_bits( & BigInt :: from( 0xff ) , & read_size( 12 ) , Endianness :: Little ) ,
3898+ Some ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ] ) ,
3899+ ) ;
3900+ }
3901+
3902+ #[ test]
3903+ fn int_to_bits_negative ( ) {
3904+ // Too few bits in int
3905+ assert_eq ! (
3906+ int_to_bits( & BigInt :: from( -128 ) , & read_size( 12 ) , Endianness :: Big ) ,
3907+ Some ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) ,
3908+ ) ;
3909+ assert_eq ! (
3910+ int_to_bits( & BigInt :: from( -128 ) , & read_size( 12 ) , Endianness :: Little ) ,
3911+ Some ( bitvec![ u8 , Msb0 ; 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 ] ) ,
3912+ ) ;
3913+ }
3914+ }
0 commit comments