@@ -81,7 +81,7 @@ use crate::{
8181 is_prelude_module, string,
8282 } ,
8383} ;
84- use bitvec:: { order:: Msb0 , slice:: BitSlice , view:: BitView } ;
84+ use bitvec:: { order:: Msb0 , slice:: BitSlice , vec :: BitVec , view:: BitView } ;
8585use ecow:: EcoString ;
8686use id_arena:: { Arena , Id } ;
8787use itertools:: Itertools ;
@@ -90,6 +90,7 @@ use num_traits::ToPrimitive;
9090use radix_trie:: { Trie , TrieCommon } ;
9191use std:: {
9292 cell:: RefCell ,
93+ cmp:: Ordering ,
9394 collections:: { HashMap , HashSet , VecDeque } ,
9495 hash:: Hash ,
9596 sync:: Arc ,
@@ -1212,6 +1213,9 @@ pub enum BitArrayMatchedValue {
12121213 /// is deemed unreachable: it is the location this literal value comes
12131214 /// from in the whole pattern.
12141215 location : SrcSpan ,
1216+ /// The bits representing the given literal integer, with the correct
1217+ /// signed- and endianness as specified in the bit array segment.
1218+ bits : Result < BitVec < u8 , Msb0 > , IntToBitsError > ,
12151219 } ,
12161220 LiteralString {
12171221 value : EcoString ,
@@ -1229,6 +1233,12 @@ pub enum BitArrayMatchedValue {
12291233}
12301234
12311235impl BitArrayMatchedValue {
1236+ /// This is an arbitrary limit beyond which interference _may_ no longer be done.
1237+ /// This is necessary because people may write very silly segments like
1238+ /// `<<0:9_007_199_254_740_992>>`, which would allocate around a wee petabyte of memory.
1239+ /// Literal strings are already in memory, and thus ignore this limit.
1240+ const MAX_BITS_INTERFERENCE : u32 = u16:: MAX as u32 ;
1241+
12321242 pub ( crate ) fn is_literal ( & self ) -> bool {
12331243 match self {
12341244 BitArrayMatchedValue :: LiteralFloat ( _)
@@ -1247,12 +1257,11 @@ impl BitArrayMatchedValue {
12471257 match self {
12481258 BitArrayMatchedValue :: LiteralString { bytes, .. } => Some ( bytes. view_bits :: < Msb0 > ( ) ) ,
12491259 BitArrayMatchedValue :: Assign { value, .. } => value. constant_bits ( ) ,
1260+ BitArrayMatchedValue :: LiteralInt { bits, .. } => bits. as_deref ( ) . ok ( ) ,
12501261
12511262 // TODO: We could also implement the interfering optimisation for
1252- // literal ints as well, but that will be a bit trickier than
1253- // strings.
1254- BitArrayMatchedValue :: LiteralInt { .. }
1255- | BitArrayMatchedValue :: LiteralFloat ( _)
1263+ // literal floats as well, but the usefulness is questionable
1264+ BitArrayMatchedValue :: LiteralFloat ( _)
12561265 | BitArrayMatchedValue :: Variable ( _)
12571266 | BitArrayMatchedValue :: Discard ( _) => None ,
12581267 }
@@ -1267,19 +1276,21 @@ impl BitArrayMatchedValue {
12671276 ) -> Option < ImpossibleBitArraySegmentPattern > {
12681277 match self {
12691278 BitArrayMatchedValue :: Assign { value, .. } => value. is_impossible_segment ( read_action) ,
1270- BitArrayMatchedValue :: LiteralInt { value, location } => {
1271- let size = read_action. size . constant_bits ( ) ?. to_u32 ( ) ?;
1272- if representable_with_bits ( value, size, read_action. signed ) {
1273- None
1274- } else {
1279+ BitArrayMatchedValue :: LiteralInt {
1280+ value,
1281+ location,
1282+ bits,
1283+ } => match bits {
1284+ Err ( IntToBitsError :: Unrepresentable { size } ) => {
12751285 Some ( ImpossibleBitArraySegmentPattern :: UnrepresentableInteger {
12761286 value : value. clone ( ) ,
1277- size,
1287+ size : * size ,
12781288 location : * location,
12791289 signed : read_action. signed ,
12801290 } )
12811291 }
1282- }
1292+ _ => None ,
1293+ } ,
12831294
12841295 BitArrayMatchedValue :: LiteralFloat ( _)
12851296 | BitArrayMatchedValue :: LiteralString { .. }
@@ -3521,6 +3532,12 @@ fn segment_matched_value(
35213532 } => BitArrayMatchedValue :: LiteralInt {
35223533 value : int_value. clone ( ) ,
35233534 location : * location,
3535+ bits : int_to_bits (
3536+ int_value,
3537+ & read_action. size ,
3538+ read_action. endianness ,
3539+ read_action. signed ,
3540+ ) ,
35243541 } ,
35253542 ast:: Pattern :: Float { value, .. } => BitArrayMatchedValue :: LiteralFloat ( value. clone ( ) ) ,
35263543 ast:: Pattern :: String { value, .. } if segment. has_utf16_option ( ) => {
@@ -3558,6 +3575,92 @@ fn segment_matched_value(
35583575 }
35593576}
35603577
3578+ fn int_to_bits (
3579+ value : & BigInt ,
3580+ read_size : & ReadSize ,
3581+ endianness : Endianness ,
3582+ signed : bool ,
3583+ ) -> Result < BitVec < u8 , Msb0 > , IntToBitsError > {
3584+ let size = read_size
3585+ . constant_bits ( )
3586+ . ok_or ( IntToBitsError :: NonConstantSize ) ?
3587+ . to_u32 ( )
3588+ . ok_or ( IntToBitsError :: ExceedsMaximumSize ) ?;
3589+
3590+ if !representable_with_bits ( value, size, signed) {
3591+ return Err ( IntToBitsError :: Unrepresentable { size } ) ;
3592+ } else if size > BitArrayMatchedValue :: MAX_BITS_INTERFERENCE {
3593+ return Err ( IntToBitsError :: ExceedsMaximumSize ) ;
3594+ }
3595+
3596+ // Pad negative numbers with 1s (true) and non-negative numbers with 0s (false)
3597+ let pad_digit = value. sign ( ) == Sign :: Minus ;
3598+ let size = size as usize ;
3599+ let mut bytes = int_to_bytes ( value, endianness, signed) ;
3600+ let bytes_size = bytes. len ( ) * 8 ;
3601+
3602+ // There are 3 cases, which are easier to handle separately by endianness
3603+ // If the size of the bigint bytes equals the expected bits, we can return them as-is
3604+ // If there are more bits than we need, we need to trim some of the most significant bits.
3605+ // E.g. `6:3` yields one byte of which we need to trim the 5 most significant bits.
3606+ // Values like 999:3 are illegal and caught by the guard at the start of the function.
3607+ // If there are fewer bits than we need, we need to add some.
3608+ // E.g. `6:13` yields one byte which we need to pad with 5 bits
3609+ let bits = match ( endianness, bytes_size. cmp ( & size) ) {
3610+ ( _, Ordering :: Equal ) => BitVec :: from_vec ( bytes) ,
3611+
3612+ ( Endianness :: Big , Ordering :: Greater ) => {
3613+ BitVec :: from_bitslice ( & bytes. view_bits ( ) [ bytes_size - size..] )
3614+ }
3615+ ( Endianness :: Big , Ordering :: Less ) => {
3616+ let mut bits = BitVec :: repeat ( pad_digit, size - bytes_size) ;
3617+ bits. extend_from_raw_slice ( & bytes) ;
3618+ bits
3619+ }
3620+
3621+ ( Endianness :: Little , Ordering :: Greater ) => {
3622+ // If the difference is greater than a byte, we returned an Error earlier
3623+ let remainder = size % 8 ;
3624+ if remainder == 0 {
3625+ BitVec :: from_vec ( bytes)
3626+ } else {
3627+ // If the size is not a multiple of 8, we need to truncate the most significant bits.
3628+ // As they are in the last byte, we leftshift by the appropriate amount and
3629+ // truncate the final bits after conversion
3630+ let last_byte = bytes. last_mut ( ) . expect ( "bytes must not be empty" ) ;
3631+ * last_byte <<= 8 - remainder;
3632+
3633+ let mut bits = BitVec :: from_vec ( bytes) ;
3634+ bits. truncate ( size) ;
3635+ bits
3636+ }
3637+ }
3638+ ( Endianness :: Little , Ordering :: Less ) => {
3639+ let mut bits = BitVec :: from_vec ( bytes) ;
3640+ let padding: BitVec < u8 , Msb0 > = BitVec :: repeat ( pad_digit, size - bytes_size) ;
3641+ bits. extend_from_bitslice ( padding. as_bitslice ( ) ) ;
3642+ bits
3643+ }
3644+ } ;
3645+ Ok ( bits)
3646+ }
3647+
3648+ fn int_to_bytes ( value : & BigInt , endianness : Endianness , signed : bool ) -> Vec < u8 > {
3649+ match ( endianness, signed) {
3650+ ( Endianness :: Big , false ) => value. to_bytes_be ( ) . 1 ,
3651+ ( Endianness :: Big , true ) => value. to_signed_bytes_be ( ) ,
3652+ ( Endianness :: Little , false ) => value. to_bytes_le ( ) . 1 ,
3653+ ( Endianness :: Little , true ) => value. to_signed_bytes_le ( ) ,
3654+ }
3655+ }
3656+
3657+ #[ derive( Clone , Copy , Eq , PartialEq , Debug , serde:: Serialize , serde:: Deserialize ) ]
3658+ pub enum IntToBitsError {
3659+ Unrepresentable { size : u32 } ,
3660+ ExceedsMaximumSize ,
3661+ NonConstantSize ,
3662+ }
3663+
35613664fn segment_size (
35623665 segment : & TypedPatternBitArraySegment ,
35633666 pattern_variables : & HashMap < EcoString , ReadAction > ,
@@ -3762,3 +3865,129 @@ mod representable_with_bits_test {
37623865 assert ! ( representable_with_bits( & BigInt :: from( -9 ) , 5 , true ) ) ;
37633866 }
37643867}
3868+
3869+ #[ cfg( test) ]
3870+ mod int_to_bits_test {
3871+ use std:: assert_eq;
3872+
3873+ use crate :: {
3874+ ast:: Endianness ,
3875+ exhaustiveness:: { BitArrayMatchedValue , IntToBitsError , ReadSize , int_to_bits} ,
3876+ } ;
3877+ use bitvec:: { bitvec, order:: Msb0 , vec:: BitVec } ;
3878+ use num_bigint:: BigInt ;
3879+
3880+ fn read_size ( size : u32 ) -> ReadSize {
3881+ ReadSize :: ConstantBits ( BigInt :: from ( size) )
3882+ }
3883+
3884+ #[ test]
3885+ fn int_to_bits_size_too_big ( ) {
3886+ assert_eq ! (
3887+ int_to_bits(
3888+ & BigInt :: ZERO ,
3889+ & read_size( BitArrayMatchedValue :: MAX_BITS_INTERFERENCE + 1 ) ,
3890+ Endianness :: Big ,
3891+ true ,
3892+ ) ,
3893+ Err ( IntToBitsError :: ExceedsMaximumSize ) ,
3894+ ) ;
3895+ }
3896+
3897+ #[ test]
3898+ fn int_to_bits_zero ( ) {
3899+ let expect = Ok ( bitvec ! [ u8 , Msb0 ; 0 ; 3 ] ) ;
3900+ assert_eq ! (
3901+ int_to_bits( & BigInt :: ZERO , & read_size( 3 ) , Endianness :: Big , false ) ,
3902+ expect
3903+ ) ;
3904+ assert_eq ! (
3905+ int_to_bits( & BigInt :: ZERO , & read_size( 3 ) , Endianness :: Little , false ) ,
3906+ expect
3907+ ) ;
3908+
3909+ let expect = Ok ( bitvec ! [ u8 , Msb0 ; 0 ; 10 ] ) ;
3910+ assert_eq ! (
3911+ int_to_bits( & BigInt :: ZERO , & read_size( 10 ) , Endianness :: Big , false ) ,
3912+ expect
3913+ ) ;
3914+ assert_eq ! (
3915+ int_to_bits( & BigInt :: ZERO , & read_size( 10 ) , Endianness :: Little , false ) ,
3916+ expect
3917+ ) ;
3918+ }
3919+
3920+ #[ test]
3921+ fn int_to_bits_positive ( ) {
3922+ // Exact match
3923+ assert_eq ! (
3924+ int_to_bits(
3925+ & BigInt :: from( 0xff00 ) ,
3926+ & read_size( 16 ) ,
3927+ Endianness :: Big ,
3928+ false
3929+ ) ,
3930+ Ok ( BitVec :: <u8 , Msb0 >:: from_vec( vec![ 0xff , 0x00 ] ) ) ,
3931+ ) ;
3932+ assert_eq ! (
3933+ int_to_bits(
3934+ & BigInt :: from( 0xff00 ) ,
3935+ & read_size( 16 ) ,
3936+ Endianness :: Little ,
3937+ false
3938+ ) ,
3939+ Ok ( BitVec :: <u8 , Msb0 >:: from_vec( vec![ 0x00 , 0xff ] ) ) ,
3940+ ) ;
3941+
3942+ assert_eq ! (
3943+ int_to_bits(
3944+ & BigInt :: from( 0b11_1111_0000 ) ,
3945+ & read_size( 10 ) ,
3946+ Endianness :: Big ,
3947+ false
3948+ ) ,
3949+ Ok ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ] ) ,
3950+ ) ;
3951+ assert_eq ! (
3952+ int_to_bits(
3953+ & BigInt :: from( 0b11_1111_0000 ) ,
3954+ & read_size( 10 ) ,
3955+ Endianness :: Little ,
3956+ false
3957+ ) ,
3958+ Ok ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 1 , 1 ] ) ,
3959+ ) ;
3960+
3961+ // Too few bits in int
3962+ assert_eq ! (
3963+ int_to_bits( & BigInt :: from( 0xff ) , & read_size( 12 ) , Endianness :: Big , false ) ,
3964+ Ok ( bitvec![ u8 , Msb0 ; 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ] ) ,
3965+ ) ;
3966+ assert_eq ! (
3967+ int_to_bits(
3968+ & BigInt :: from( 0xff ) ,
3969+ & read_size( 12 ) ,
3970+ Endianness :: Little ,
3971+ false
3972+ ) ,
3973+ Ok ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 ] ) ,
3974+ ) ;
3975+ }
3976+
3977+ #[ test]
3978+ fn int_to_bits_signed ( ) {
3979+ assert_eq ! (
3980+ int_to_bits( & BigInt :: from( -128 ) , & read_size( 12 ) , Endianness :: Big , true ) ,
3981+ Ok ( bitvec![ u8 , Msb0 ; 1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) ,
3982+ ) ;
3983+ assert_eq ! (
3984+ int_to_bits(
3985+ & BigInt :: from( -128 ) ,
3986+ & read_size( 12 ) ,
3987+ Endianness :: Little ,
3988+ true
3989+ ) ,
3990+ Ok ( bitvec![ u8 , Msb0 ; 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 ] ) ,
3991+ ) ;
3992+ }
3993+ }
0 commit comments