Skip to content

Commit 96b6e7e

Browse files
author
Gilad Chase
committed
feat(byte_array): add ByteSpan iterator
Also extract msb->lsb indexing into helper.
1 parent 2280b52 commit 96b6e7e

File tree

4 files changed

+1645
-1324
lines changed

4 files changed

+1645
-1324
lines changed

corelib/src/byte_array.cairo

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,11 @@
4242
//! assert!(first_byte == 0x41);
4343
//! ```
4444

45-
use crate::array::{ArrayTrait, Span, SpanTrait};
46-
use crate::bytes_31::split_bytes31;
45+
use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
4746
#[allow(unused_imports)]
4847
use crate::bytes_31::{
4948
BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
50-
one_shift_left_bytes_u128, split_u128, u8_at_u256,
49+
one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
5150
};
5251
use crate::clone::Clone;
5352
use crate::cmp::min;
@@ -238,8 +237,8 @@ pub impl ByteArrayImpl of ByteArrayTrait {
238237
}
239238
// index_in_word is from MSB, we need index from LSB.
240239
return Some(
241-
u8_at_u256(
242-
(*self.pending_word).into(), upcast(*self.pending_word_len) - 1 - index_in_word,
240+
helpers::byte_at_msb(
241+
*self.pending_word, upcast(*self.pending_word_len), index_in_word,
243242
),
244243
);
245244
}
@@ -957,6 +956,107 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
957956
}
958957
}
959958

959+
/// An iterator struct over a ByteSpan.
960+
// TODO(giladchase): Use this in byte array into_iter and deprecate ByteArrayIter.
961+
#[derive(Drop, Clone)]
962+
pub struct ByteSpanIter {
963+
data_iter: SpanIter<bytes31>,
964+
current_word: felt252,
965+
current_word_len: usize,
966+
byte_in_word_index: usize,
967+
remainder_word: felt252,
968+
remainder_len: usize,
969+
}
970+
971+
impl ByteSpanIterDefault of Default<ByteSpanIter> {
972+
fn default() -> ByteSpanIter {
973+
ByteSpanIter {
974+
data_iter: [].span().into_iter(),
975+
current_word: Default::default(),
976+
current_word_len: Default::default(),
977+
byte_in_word_index: Default::default(),
978+
remainder_word: Default::default(),
979+
remainder_len: Default::default(),
980+
}
981+
}
982+
}
983+
984+
985+
#[generate_trait]
986+
impl ByteSpanIterImpl of ByteSpanIterTrait {
987+
/// Loads a new word as the the current word and returns its first byte if it's not empty,
988+
/// otherwise returns None.
989+
#[inline]
990+
fn load_word_next(ref self: ByteSpanIter, word: felt252, len: usize) -> Option<u8> {
991+
if len == 0 {
992+
return None;
993+
}
994+
995+
self.current_word = word;
996+
self.current_word_len = len;
997+
self.byte_in_word_index = 1;
998+
Some(helpers::byte_at_msb(word, len, 0))
999+
}
1000+
}
1001+
1002+
impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
1003+
type Item = u8;
1004+
1005+
#[inline]
1006+
fn next(ref self: ByteSpanIter) -> Option<u8> {
1007+
// Return the next byte from the current word if it has bytes left.
1008+
if self.byte_in_word_index < self.current_word_len {
1009+
let byte = helpers::byte_at_msb(
1010+
self.current_word, self.current_word_len, self.byte_in_word_index,
1011+
);
1012+
self.byte_in_word_index += 1;
1013+
return Some(byte);
1014+
}
1015+
1016+
// Current word exhausted, try advancing to the next word from the data iterator.
1017+
if let Some(word) = self.data_iter.next() {
1018+
self.load_word_next((*word).into(), BYTES_IN_BYTES31)
1019+
} else {
1020+
// No more data words, advance into the remainder word if it exists.
1021+
let len = self.remainder_len;
1022+
self.remainder_len = 0; // Mark remainder as consumed.
1023+
self.load_word_next(self.remainder_word, len)
1024+
}
1025+
}
1026+
}
1027+
1028+
impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
1029+
type IntoIter = ByteSpanIter;
1030+
1031+
/// Creates an iterator over the bytes in the `ByteSpan`.
1032+
#[inline]
1033+
fn into_iter(self: ByteSpan) -> Self::IntoIter {
1034+
let mut data_iter = self.data.into_iter();
1035+
1036+
let Some(first_word) = data_iter.next() else {
1037+
// No data words, start iterating from the remainder word.
1038+
return ByteSpanIter {
1039+
data_iter,
1040+
current_word: self.remainder_word,
1041+
current_word_len: upcast(self.remainder_len),
1042+
byte_in_word_index: upcast(self.first_char_start_offset),
1043+
remainder_word: self.remainder_word,
1044+
remainder_len: 0 // Mark as consumed since we're starting with it.
1045+
};
1046+
};
1047+
1048+
ByteSpanIter {
1049+
data_iter,
1050+
current_word: (*first_word).into(),
1051+
current_word_len: BYTES_IN_BYTES31,
1052+
byte_in_word_index: upcast(self.first_char_start_offset),
1053+
remainder_word: self.remainder_word,
1054+
remainder_len: upcast(self.remainder_len),
1055+
}
1056+
}
1057+
}
1058+
1059+
9601060
/// Shifts a word right by `n_bytes`.
9611061
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
9621062
/// performance.
@@ -1173,5 +1273,14 @@ mod helpers {
11731273
},
11741274
}
11751275
}
1276+
1277+
/// Extracts a byte from a word at the given MSB index.
1278+
/// Converts from MSB indexing (0 = first/leftmost byte) to LSB indexing.
1279+
#[inline]
1280+
pub fn byte_at_msb(word: felt252, word_len: usize, msb_index: usize) -> u8 {
1281+
// Convert from MSB to LSB indexing: index 0 MSB = index (len-1) LSB.
1282+
let lsb_index = word_len - 1 - msb_index;
1283+
u8_at_u256(word.into(), lsb_index)
1284+
}
11761285
}
11771286
pub(crate) use helpers::len_parts;

corelib/src/test/byte_array_test.cairo

Lines changed: 215 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#[feature("byte-span")]
2-
use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
3-
use crate::num::traits::Bounded;
4-
use crate::test::test_utils::{assert_eq, assert_ne};
2+
use core::byte_array::{ByteSpanTrait, ToByteSpanTrait};
3+
use core::num::traits::Bounded;
4+
use core::test::test_utils::{assert_eq, assert_ne};
55

66
#[test]
77
fn test_append_byte() {
@@ -843,10 +843,221 @@ fn test_span_at_overflows() {
843843
// Test overflow protection with large indices.
844844
let ba: ByteArray = "test";
845845
let span = ba.span();
846-
847846
assert_eq!(span.get(Bounded::<usize>::MAX), None);
848847

849848
let sliced = ba.span().get(1..3).unwrap();
850849
assert_eq!(sliced.get(Bounded::<usize>::MAX - 1), None);
851850
assert_eq!(sliced.get(Bounded::<usize>::MAX), None);
852851
}
852+
853+
#[test]
854+
fn test_byte_span_simple() {
855+
let empty: ByteArray = "";
856+
let mut iter = empty.span().into_iter();
857+
assert_eq!(iter.next(), None);
858+
859+
let ba: ByteArray = "A";
860+
let mut iter = ba.span().into_iter();
861+
assert_eq!(iter.next(), Some('A'));
862+
assert_eq!(iter.next(), None);
863+
864+
let ba: ByteArray = "ABC";
865+
let mut iter = ba.span().into_iter();
866+
assert_eq!(iter.next(), Some('A'));
867+
assert_eq!(iter.next(), Some('B'));
868+
assert_eq!(iter.next(), Some('C'));
869+
assert_eq!(iter.next(), None);
870+
assert_eq!(iter.next(), None, "Idempotent empty");
871+
}
872+
873+
#[test]
874+
fn test_byte_span_iterator_word_boundaries() {
875+
// Test 30, 31, 32 bytes (1 word boundary).
876+
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
877+
let mut iter = ba_30.span().into_iter();
878+
for _ in 0_usize..29 {
879+
let _ = iter.next();
880+
}
881+
assert_eq!(iter.next(), Some('d'), "30 bytes - last byte");
882+
assert_eq!(iter.next(), None);
883+
884+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
885+
let mut iter = ba_31.span().into_iter();
886+
assert_eq!(iter.next(), Some('A'));
887+
for _ in 1_usize..30 {
888+
let _ = iter.next();
889+
}
890+
assert_eq!(iter.next(), Some('e'), "31 bytes - last byte");
891+
assert_eq!(iter.next(), None);
892+
893+
let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
894+
let mut iter = ba_32.span().into_iter();
895+
for _ in 0_usize..30 {
896+
let _ = iter.next();
897+
}
898+
assert_eq!(iter.next(), Some('e'), "32 bytes - byte 30");
899+
assert_eq!(iter.next(), Some('f'), "32 bytes - byte 31");
900+
assert_eq!(iter.next(), None);
901+
902+
// Test 62, 63, 64 bytes (2 word boundaries).
903+
let ba_62: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
904+
let mut iter = ba_62.span().into_iter();
905+
for _ in 0_usize..61 {
906+
let _ = iter.next();
907+
}
908+
assert_eq!(iter.next(), Some('9'), "62 bytes - last byte");
909+
assert_eq!(iter.next(), None);
910+
911+
let ba_63: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!";
912+
let mut iter = ba_63.span().into_iter();
913+
for _ in 0_usize..62 {
914+
let _ = iter.next();
915+
}
916+
assert_eq!(iter.next(), Some('!'), "63 bytes - last byte");
917+
assert_eq!(iter.next(), None);
918+
919+
let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@";
920+
let mut iter = ba_64.span().into_iter();
921+
for _ in 0_usize..62 {
922+
let _ = iter.next();
923+
}
924+
assert_eq!(iter.next(), Some('!'), "64 bytes - byte 62");
925+
assert_eq!(iter.next(), Some('@'), "64 bytes - byte 63");
926+
assert_eq!(iter.next(), None);
927+
}
928+
929+
#[test]
930+
fn test_byte_span_iterator_multiple_words() {
931+
// Test with 3+ words to verify iteration works across multiple word boundaries.
932+
// 92 bytes: 31 + 31 + 30.
933+
let ba_92: ByteArray =
934+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':,.<>?/~`";
935+
let span = ba_92.span();
936+
let mut iter = span.into_iter();
937+
// Verify we can iterate through all bytes.
938+
let mut count = 0;
939+
while let Some(_) = iter.next() {
940+
count += 1;
941+
}
942+
assert_eq!(count, 92, "should iterate all 92 bytes");
943+
944+
// Verify correctness at specific positions.
945+
let mut iter = span.into_iter();
946+
assert_eq!(iter.next(), Some('A'));
947+
948+
// Skip to last byte.
949+
for _ in 1_usize..91 {
950+
let _ = iter.next();
951+
}
952+
assert_eq!(iter.next(), Some('`'));
953+
assert_eq!(iter.next(), None);
954+
}
955+
956+
#[test]
957+
fn test_byte_span_iterator_for_loop_collect() {
958+
let small_ba: ByteArray = "Hello";
959+
let span = small_ba.span();
960+
961+
let mut collected = Default::default();
962+
let mut count = 0;
963+
for byte in span {
964+
collected.append_byte(byte);
965+
count += 1;
966+
}
967+
assert_eq!(collected, small_ba);
968+
assert_eq!(count, 5);
969+
assert_eq!(span.into_iter().collect(), small_ba);
970+
971+
// Test with 2 words.
972+
let ba_40: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn";
973+
collected = Default::default();
974+
count = 0;
975+
for byte in ba_40.span() {
976+
collected.append_byte(byte);
977+
count += 1;
978+
}
979+
assert_eq!(collected, ba_40);
980+
assert_eq!(count, 40);
981+
assert_eq!(ba_40.span().into_iter().collect(), ba_40);
982+
983+
// Test with 3 words.
984+
let ba_70: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*";
985+
collected = Default::default();
986+
count = 0;
987+
for byte in ba_70.span() {
988+
collected.append_byte(byte);
989+
count += 1;
990+
}
991+
assert_eq!(collected, ba_70);
992+
assert_eq!(count, 70);
993+
assert_eq!(ba_70.span().into_iter().collect(), ba_70);
994+
}
995+
996+
#[test]
997+
fn test_byte_span_iterator_slices() {
998+
// Slice within remainder word (< 31 bytes).
999+
let ba_13: ByteArray = "Hello Shmello";
1000+
let span = ba_13.span().get(2..7).unwrap();
1001+
1002+
let mut iter = span.into_iter();
1003+
assert_eq!(iter.next(), Some('l'));
1004+
assert_eq!(iter.next(), Some('l'));
1005+
assert_eq!(iter.next(), Some('o'));
1006+
assert_eq!(iter.next(), Some(' '));
1007+
assert_eq!(iter.next(), Some('S'));
1008+
assert_eq!(iter.next(), None);
1009+
1010+
// Iterate slice across 2 words (1 data + remainder).
1011+
let ba_33: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
1012+
let span = ba_33.span().get(27..32).unwrap();
1013+
1014+
let mut iter = span.into_iter();
1015+
assert_eq!(iter.next(), Some('b'));
1016+
assert_eq!(iter.next(), Some('c'));
1017+
assert_eq!(iter.next(), Some('d'));
1018+
assert_eq!(iter.next(), Some('e'));
1019+
assert_eq!(iter.next(), Some('f'));
1020+
assert_eq!(iter.next(), None);
1021+
1022+
// Iterate slice across 3 words.
1023+
let ba_66: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$";
1024+
let span = ba_66.span().get(29..64).unwrap();
1025+
1026+
let mut iter = span.into_iter();
1027+
assert_eq!(iter.next(), Some('d'), "First word");
1028+
assert_eq!(iter.next(), Some('e'));
1029+
assert_eq!(iter.next(), Some('f'), "Second word");
1030+
assert_eq!(iter.next(), Some('g'));
1031+
assert_eq!(iter.next(), Some('h'));
1032+
assert_eq!(iter.next(), Some('i'));
1033+
assert_eq!(iter.next(), Some('j'));
1034+
assert_eq!(iter.next(), Some('k'));
1035+
assert_eq!(iter.next(), Some('l'));
1036+
assert_eq!(iter.next(), Some('m'));
1037+
assert_eq!(iter.next(), Some('n'));
1038+
assert_eq!(iter.next(), Some('o'));
1039+
assert_eq!(iter.next(), Some('p'));
1040+
assert_eq!(iter.next(), Some('q'));
1041+
assert_eq!(iter.next(), Some('r'));
1042+
assert_eq!(iter.next(), Some('s'));
1043+
assert_eq!(iter.next(), Some('t'));
1044+
assert_eq!(iter.next(), Some('u'));
1045+
assert_eq!(iter.next(), Some('v'));
1046+
assert_eq!(iter.next(), Some('w'));
1047+
assert_eq!(iter.next(), Some('x'));
1048+
assert_eq!(iter.next(), Some('y'));
1049+
assert_eq!(iter.next(), Some('z'));
1050+
assert_eq!(iter.next(), Some('0'));
1051+
assert_eq!(iter.next(), Some('1'));
1052+
assert_eq!(iter.next(), Some('2'));
1053+
assert_eq!(iter.next(), Some('3'));
1054+
assert_eq!(iter.next(), Some('4'));
1055+
assert_eq!(iter.next(), Some('5'));
1056+
assert_eq!(iter.next(), Some('6'));
1057+
assert_eq!(iter.next(), Some('7'));
1058+
assert_eq!(iter.next(), Some('8'));
1059+
assert_eq!(iter.next(), Some('9'));
1060+
assert_eq!(iter.next(), Some('!'), "Remainder word");
1061+
assert_eq!(iter.next(), Some('@'));
1062+
assert_eq!(iter.next(), None);
1063+
}

0 commit comments

Comments
 (0)