diff --git a/corelib/src/byte_array.cairo b/corelib/src/byte_array.cairo index 6e906883362..5ec0bed4f3a 100644 --- a/corelib/src/byte_array.cairo +++ b/corelib/src/byte_array.cairo @@ -42,12 +42,11 @@ //! assert!(first_byte == 0x41); //! ``` -use crate::array::{ArrayTrait, Span, SpanTrait}; -use crate::bytes_31::split_bytes31; +use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait}; #[allow(unused_imports)] use crate::bytes_31::{ BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31, - one_shift_left_bytes_u128, split_u128, u8_at_u256, + one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256, }; use crate::clone::Clone; use crate::cmp::min; @@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView { type Item = u8; + fn next(ref self: ByteArrayIter) -> Option { self.ba.at(self.current_index.next()?) } @@ -973,6 +973,84 @@ impl ByteSpanToByteSpan of ToByteSpanTrait { } } +/// An iterator struct over a ByteSpan. +#[derive(Drop, Clone)] +pub struct ByteSpanIter { + /// Iterator over the full words. + data_iter: SpanIter, + /// The word currently being iterated over. + current_word: ShortString, + /// The last, partial word of the ByteSpan, iterated over after all full words are consumed. + remainder: ShortString, +} + +impl ByteSpanIterator of crate::iter::Iterator { + type Item = u8; + + fn next(ref self: ByteSpanIter) -> Option { + if let Some(byte) = self.current_word.pop_first() { + return Some(byte); + } + + // Current word exhausted, try loading the next into current word from data or remainder. + match self.data_iter.next() { + Some(word) => { self.current_word = ShortString { data: (*word).into(), len: 31 }; }, + // No more words in data, try loading the remainder. + None => { + self.current_word = self.remainder; + self.remainder.len = 0; // Mark remainder as consumed. + }, + } + + self.current_word.pop_first() + } +} + +impl ByteSpanIntoIterator of crate::iter::IntoIterator { + type IntoIter = ByteSpanIter; + + /// Creates an iterator over the bytes in the `ByteSpan`. + fn into_iter(self: ByteSpan) -> Self::IntoIter { + let mut data_iter = self.data.into_iter(); + + // Get first word in data array if exists, otherwise iterate on the remainder word. + let Some(first_word) = data_iter.next() else { + // On empty data span, remainder length is larger than or equals to the start offset. + let len = + match helpers::length_sub_offset( + upcast(self.remainder_len), self.first_char_start_offset, + ) { + Some(len) => len, + // Can't actually happen, as start offset is at most the remainder length. + None => 0, + }; + + return ByteSpanIter { + data_iter, + current_word: ShortString { data: self.remainder_word, len }, + remainder: Default::default(), + }; + }; + + // TODO(orizi): Use `complement_to_31` when added. + let len = + match helpers::length_sub_offset( + upcast(BYTES_IN_BYTES31), self.first_char_start_offset, + ) { + Some(len) => len, + // Can't actually happen, as start offset is at most 30. + None => 0, + }; + + ByteSpanIter { + data_iter, + current_word: ShortString { data: (*first_word).into(), len }, + remainder: ShortString { data: self.remainder_word, len: upcast(self.remainder_len) }, + } + } +} + + /// Shifts a word right by `n_bytes`. /// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve /// performance. @@ -987,6 +1065,36 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 { let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes); after_shift_right } +/// Representation of a `felt252` holding a string up to size 31, including length. +#[derive(Drop, Copy)] +struct ShortString { + /// The actual data. + data: felt252, + /// The actual length of the short string in bytes. + len: BoundedInt<0, 31>, +} + +#[generate_trait] +impl ShortStringImpl of ShortStringTrait { + /// Removes and returns the first byte from the string if it exists. + fn pop_first(ref self: ShortString) -> Option { + let Some(byte_position) = helpers::short_string_byte_count_dec(self.len) else { + return None; + }; + + // Strings are indexed by lsb, so the first char is at position (byte_count - 1). + let byte = u8_at_u256(self.data.into(), upcast(byte_position)); + + self.len = byte_position; + Some(byte) + } +} + +impl ShortStringDefault of Default { + fn default() -> ShortString { + ShortString { data: 0, len: 0 } + } +} mod helpers { use core::num::traits::Bounded; @@ -1110,6 +1218,36 @@ mod helpers { } } + pub impl TrimMinShortStringCount of bounded_int::TrimMinHelper> { + type Target = BoundedInt<1, 31>; + } + + /// Decrements the ShortString byte count by one, or returns `None` if the count is zero. + pub fn short_string_byte_count_dec(count: BoundedInt<0, 31>) -> Option> { + if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(count) { + Some(upcast(length_minus_one(trimmed))) + } else { + None + } + } + + impl B31SubOffset of SubHelper, Bytes31Index> { + type Result = BoundedInt<-30, 31>; + } + + impl ConstrainB31SubOffsetPos of ConstrainHelper { + type LowT = BoundedInt<-30, -1>; + type HighT = BoundedInt<0, 31>; + } + + /// Subtracts `offset` from `length`, assumes `offset < length`. + pub fn length_sub_offset( + length: BoundedInt<0, 31>, offset: Bytes31Index, + ) -> Option> { + let diff = bounded_int::sub(length, offset); + bounded_int::constrain::<_, 0>(diff).err() + } + /// The information about the new pending word length and the split index. pub enum AppendWordInfo { /// The new pending word length is less than 31, and fits in the current pending word. diff --git a/corelib/src/test/byte_array_test.cairo b/corelib/src/test/byte_array_test.cairo index e05566cb737..f141cc37ce7 100644 --- a/corelib/src/test/byte_array_test.cairo +++ b/corelib/src/test/byte_array_test.cairo @@ -1,7 +1,7 @@ #[feature("byte-span")] -use core::byte_array::{ByteSpan, ByteSpanTrait, ToByteSpanTrait}; -use core::num::traits::Bounded; -use core::test::test_utils::{assert_eq, assert_ne}; +use crate::byte_array::{ByteSpan, ByteSpanTrait, ToByteSpanTrait}; +use crate::num::traits::Bounded; +use crate::test::test_utils::{assert_eq, assert_ne}; #[test] fn test_append_byte() { @@ -731,10 +731,148 @@ fn test_span_at_overflows() { // Test overflow protection with large indices. let ba: ByteArray = "test"; let span = ba.span(); - assert_eq!(span.get(Bounded::::MAX), None); let sliced = ba.span().get(1..3).unwrap(); assert_eq!(sliced.get(Bounded::::MAX - 1), None); assert_eq!(sliced.get(Bounded::::MAX), None); } + +#[test] +fn test_byte_span_simple() { + let empty: ByteArray = ""; + assert_eq!(empty.span().into_iter().collect(), array![]); + + let ba: ByteArray = "A"; + assert_eq!(ba.span().into_iter().collect(), array!['A']); + + let ba: ByteArray = "AB"; + assert_eq!(ba.span().into_iter().collect(), array!['A', 'B']); +} + +#[test] +fn test_byte_span_iterator_word_boundaries() { + // Test 30, 31, 32 bytes (1 word boundary). + let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd"; + let mut iter = ba_30.span().into_iter(); + iter.advance_by(29).unwrap(); + assert_eq!(iter.collect(), array!['d'], "30 bytes - last byte"); + + let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde"; + let mut iter = ba_31.span().into_iter(); + assert_eq!(iter.next(), Some('A')); + iter.advance_by(29).unwrap(); + assert_eq!(iter.collect(), array!['e'], "31 bytes - last byte"); + + let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; + let mut iter = ba_32.span().into_iter(); + iter.advance_by(30).unwrap(); + assert_eq!(iter.collect(), array!['e', 'f'], "32 bytes - last two bytes"); + + // Test 62, 63, 64 bytes (2 word boundaries). + let ba_62: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + let mut iter = ba_62.span().into_iter(); + iter.advance_by(61).unwrap(); + assert_eq!(iter.collect(), array!['9'], "62 bytes - last byte"); + + let ba_63: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!"; + let mut iter = ba_63.span().into_iter(); + iter.advance_by(62).unwrap(); + assert_eq!(iter.collect(), array!['!'], "63 bytes - last byte"); + + let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@"; + let mut iter = ba_64.span().into_iter(); + iter.advance_by(62).unwrap(); + assert_eq!(iter.collect(), array!['!', '@'], "64 bytes - last two bytes"); +} + +#[test] +fn test_byte_span_iterator_multiple_words() { + // Test with 3+ words to verify iteration works across multiple word boundaries. + // 92 bytes: 31 + 31 + 30. + let ba_92: ByteArray = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':,.<>?/~`"; + let span = ba_92.span(); + assert_eq!(span.into_iter().count(), 92, "should iterate all 92 bytes"); + + // Verify correctness at specific positions. + let mut iter = span.into_iter(); + assert_eq!(iter.next(), Some('A')); + + // Skip to last byte. + iter.advance_by(90).unwrap(); + assert_eq!(iter.collect(), array!['`'], "90 bytes - last byte"); +} + +#[test] +fn test_byte_span_iterator_for_loop_collect() { + let small_ba: ByteArray = "Hello"; + let span = small_ba.span(); + + let mut collected = Default::default(); + let mut count = 0; + for byte in span { + collected.append_byte(byte); + count += 1; + } + assert_eq!(collected, small_ba); + assert_eq!(count, 5); + assert_eq!(span.into_iter().collect(), small_ba); + + // Test with 2 words. + let ba_40: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn"; + collected = Default::default(); + count = 0; + for byte in ba_40.span() { + collected.append_byte(byte); + count += 1; + } + assert_eq!(collected, ba_40); + assert_eq!(count, 40); + assert_eq!(ba_40.span().into_iter().collect(), ba_40); + + // Test with 3 words. + let ba_70: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*"; + collected = Default::default(); + count = 0; + for byte in ba_70.span() { + collected.append_byte(byte); + count += 1; + } + assert_eq!(collected, ba_70); + assert_eq!(count, 70); + assert_eq!(ba_70.span().into_iter().collect(), ba_70); +} + +#[test] +fn test_byte_span_iterator_slices() { + // Slice within remainder word (< 31 bytes). + let ba_13: ByteArray = "Hello Shmello"; + let span = ba_13.span().get(2..7).unwrap(); + assert_eq!( + span.into_iter().collect(), array!['l', 'l', 'o', ' ', 'S'], "slice within remainder word", + ); + + // Iterate slice across 2 words (1 data + remainder). + let ba_33: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg"; + let span = ba_33.span().get(27..32).unwrap(); + assert_eq!(span.into_iter().collect(), array!['b', 'c', 'd', 'e', 'f'], "slice across 2 words"); + + // Iterate slice across 3 words. + let ba_66: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$"; + let span = ba_66.span().get(29..64).unwrap(); + assert_eq!( + span.into_iter().collect(), + array![ + 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', + '@', + ], + "slice across 3 words", + ); + + // Test case where data array is exhausted and remainder has exactly 1 byte. + let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"; + let span = ba_32.span().get(31..32).unwrap(); + assert_eq!(span.into_iter().collect(), array!['f'], "1 byte in remainder after data exhausted"); +}