Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 142 additions & 4 deletions corelib/src/byte_array.cairo
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@
//! assert!(first_byte == 0x41);
//! ```

use crate::array::{ArrayTrait, Span, SpanTrait};
use crate::bytes_31::split_bytes31;
use crate::array::{ArrayTrait, Span, SpanIter, SpanTrait};
#[allow(unused_imports)]
use crate::bytes_31::{
BYTES_IN_BYTES31, Bytes31Trait, POW_2_128, POW_2_8, U128IntoBytes31, U8IntoBytes31,
one_shift_left_bytes_u128, split_u128, u8_at_u256,
one_shift_left_bytes_u128, split_bytes31, split_u128, u8_at_u256,
};
use crate::clone::Clone;
use crate::cmp::min;
Expand Down Expand Up @@ -706,7 +705,7 @@ pub(crate) impl ByteArrayIndexView of crate::traits::IndexView<ByteArray, usize,
}
}

// TODO: Implement a more efficient version of this iterator.
// TODO(giladchase): Delegate to byte span iterator instead of current at-based implementation.
/// An iterator struct over a ByteArray.
#[derive(Drop, Clone)]
pub struct ByteArrayIter {
Expand All @@ -716,6 +715,7 @@ pub struct ByteArrayIter {

impl ByteArrayIterator of crate::iter::Iterator<ByteArrayIter> {
type Item = u8;

fn next(ref self: ByteArrayIter) -> Option<u8> {
self.ba.at(self.current_index.next()?)
}
Expand Down Expand Up @@ -973,6 +973,84 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
}
}

/// An iterator struct over a ByteSpan.
#[derive(Drop, Clone)]
pub struct ByteSpanIter {
/// Iterator over the full words.
data_iter: SpanIter<bytes31>,
/// The word currently being iterated over.
current_word: ShortString,
/// The last, partial word of the ByteSpan, iterated over after all full words are consumed.
remainder: ShortString,
}

impl ByteSpanIterator of crate::iter::Iterator<ByteSpanIter> {
type Item = u8;

fn next(ref self: ByteSpanIter) -> Option<u8> {
if let Some(byte) = self.current_word.pop_first() {
return Some(byte);
}

// Current word exhausted, try loading the next into current word from data or remainder.
match self.data_iter.next() {
Some(word) => { self.current_word = ShortString { data: (*word).into(), len: 31 }; },
// No more words in data, try loading the remainder.
None => {
self.current_word = self.remainder;
self.remainder.len = 0; // Mark remainder as consumed.
},
}

self.current_word.pop_first()
}
}

impl ByteSpanIntoIterator of crate::iter::IntoIterator<ByteSpan> {
type IntoIter = ByteSpanIter;

/// Creates an iterator over the bytes in the `ByteSpan`.
fn into_iter(self: ByteSpan) -> Self::IntoIter {
let mut data_iter = self.data.into_iter();

// Get first word in data array if exists, otherwise iterate on the remainder word.
let Some(first_word) = data_iter.next() else {
// On empty data span, remainder length is larger than or equals to the start offset.
let len =
match helpers::length_sub_offset(
upcast(self.remainder_len), self.first_char_start_offset,
) {
Some(len) => len,
// Can't actually happen, as start offset is at most the remainder length.
None => 0,
};

return ByteSpanIter {
data_iter,
current_word: ShortString { data: self.remainder_word, len },
remainder: Default::default(),
};
};

// TODO(orizi): Use `complement_to_31` when added.
let len =
match helpers::length_sub_offset(
upcast(BYTES_IN_BYTES31), self.first_char_start_offset,
) {
Some(len) => len,
// Can't actually happen, as start offset is at most 30.
None => 0,
};

ByteSpanIter {
data_iter,
current_word: ShortString { data: (*first_word).into(), len },
remainder: ShortString { data: self.remainder_word, len: upcast(self.remainder_len) },
}
}
}


/// Shifts a word right by `n_bytes`.
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
/// performance.
Expand All @@ -987,6 +1065,36 @@ fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
after_shift_right
}
/// Representation of a `felt252` holding a string up to size 31, including length.
#[derive(Drop, Copy)]
struct ShortString {
/// The actual data.
data: felt252,
/// The actual length of the short string in bytes.
len: BoundedInt<0, 31>,
}

#[generate_trait]
impl ShortStringImpl of ShortStringTrait {
/// Removes and returns the first byte from the string if it exists.
fn pop_first(ref self: ShortString) -> Option<u8> {
let Some(byte_position) = helpers::short_string_byte_count_dec(self.len) else {
return None;
};

// Strings are indexed by lsb, so the first char is at position (byte_count - 1).
let byte = u8_at_u256(self.data.into(), upcast(byte_position));

self.len = byte_position;
Some(byte)
}
}

impl ShortStringDefault of Default<ShortString> {
fn default() -> ShortString {
ShortString { data: 0, len: 0 }
}
}

mod helpers {
use core::num::traits::Bounded;
Expand Down Expand Up @@ -1110,6 +1218,36 @@ mod helpers {
}
}

pub impl TrimMinShortStringCount of bounded_int::TrimMinHelper<BoundedInt<0, 31>> {
type Target = BoundedInt<1, 31>;
}

/// Decrements the ShortString byte count by one, or returns `None` if the count is zero.
pub fn short_string_byte_count_dec(count: BoundedInt<0, 31>) -> Option<BoundedInt<0, 31>> {
if let crate::internal::OptionRev::Some(trimmed) = bounded_int::trim_min(count) {
Some(upcast(length_minus_one(trimmed)))
} else {
None
}
}

impl B31SubOffset of SubHelper<BoundedInt<0, 31>, Bytes31Index> {
type Result = BoundedInt<-30, 31>;
}

impl ConstrainB31SubOffsetPos of ConstrainHelper<B31SubOffset::Result, 0> {
type LowT = BoundedInt<-30, -1>;
type HighT = BoundedInt<0, 31>;
}

/// Subtracts `offset` from `length`, assumes `offset < length`.
pub fn length_sub_offset(
length: BoundedInt<0, 31>, offset: Bytes31Index,
) -> Option<BoundedInt<0, 31>> {
let diff = bounded_int::sub(length, offset);
bounded_int::constrain::<_, 0>(diff).err()
}

/// The information about the new pending word length and the split index.
pub enum AppendWordInfo {
/// The new pending word length is less than 31, and fits in the current pending word.
Expand Down
146 changes: 142 additions & 4 deletions corelib/src/test/byte_array_test.cairo
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#[feature("byte-span")]
use core::byte_array::{ByteSpan, ByteSpanTrait, ToByteSpanTrait};
use core::num::traits::Bounded;
use core::test::test_utils::{assert_eq, assert_ne};
use crate::byte_array::{ByteSpan, ByteSpanTrait, ToByteSpanTrait};
use crate::num::traits::Bounded;
use crate::test::test_utils::{assert_eq, assert_ne};

#[test]
fn test_append_byte() {
Expand Down Expand Up @@ -731,10 +731,148 @@ fn test_span_at_overflows() {
// Test overflow protection with large indices.
let ba: ByteArray = "test";
let span = ba.span();

assert_eq!(span.get(Bounded::<usize>::MAX), None);

let sliced = ba.span().get(1..3).unwrap();
assert_eq!(sliced.get(Bounded::<usize>::MAX - 1), None);
assert_eq!(sliced.get(Bounded::<usize>::MAX), None);
}

#[test]
fn test_byte_span_simple() {
let empty: ByteArray = "";
assert_eq!(empty.span().into_iter().collect(), array![]);

let ba: ByteArray = "A";
assert_eq!(ba.span().into_iter().collect(), array!['A']);

let ba: ByteArray = "AB";
assert_eq!(ba.span().into_iter().collect(), array!['A', 'B']);
}

#[test]
fn test_byte_span_iterator_word_boundaries() {
// Test 30, 31, 32 bytes (1 word boundary).
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
let mut iter = ba_30.span().into_iter();
iter.advance_by(29).unwrap();
assert_eq!(iter.collect(), array!['d'], "30 bytes - last byte");

let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
let mut iter = ba_31.span().into_iter();
assert_eq!(iter.next(), Some('A'));
iter.advance_by(29).unwrap();
assert_eq!(iter.collect(), array!['e'], "31 bytes - last byte");

let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
let mut iter = ba_32.span().into_iter();
iter.advance_by(30).unwrap();
assert_eq!(iter.collect(), array!['e', 'f'], "32 bytes - last two bytes");

// Test 62, 63, 64 bytes (2 word boundaries).
let ba_62: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let mut iter = ba_62.span().into_iter();
iter.advance_by(61).unwrap();
assert_eq!(iter.collect(), array!['9'], "62 bytes - last byte");

let ba_63: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!";
let mut iter = ba_63.span().into_iter();
iter.advance_by(62).unwrap();
assert_eq!(iter.collect(), array!['!'], "63 bytes - last byte");

let ba_64: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@";
let mut iter = ba_64.span().into_iter();
iter.advance_by(62).unwrap();
assert_eq!(iter.collect(), array!['!', '@'], "64 bytes - last two bytes");
}

#[test]
fn test_byte_span_iterator_multiple_words() {
// Test with 3+ words to verify iteration works across multiple word boundaries.
// 92 bytes: 31 + 31 + 30.
let ba_92: ByteArray =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()_+-=[]{}|;':,.<>?/~`";
let span = ba_92.span();
assert_eq!(span.into_iter().count(), 92, "should iterate all 92 bytes");

// Verify correctness at specific positions.
let mut iter = span.into_iter();
assert_eq!(iter.next(), Some('A'));

// Skip to last byte.
iter.advance_by(90).unwrap();
assert_eq!(iter.collect(), array!['`'], "90 bytes - last byte");
}

#[test]
fn test_byte_span_iterator_for_loop_collect() {
let small_ba: ByteArray = "Hello";
let span = small_ba.span();

let mut collected = Default::default();
let mut count = 0;
for byte in span {
collected.append_byte(byte);
count += 1;
}
assert_eq!(collected, small_ba);
assert_eq!(count, 5);
assert_eq!(span.into_iter().collect(), small_ba);

// Test with 2 words.
let ba_40: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn";
collected = Default::default();
count = 0;
for byte in ba_40.span() {
collected.append_byte(byte);
count += 1;
}
assert_eq!(collected, ba_40);
assert_eq!(count, 40);
assert_eq!(ba_40.span().into_iter().collect(), ba_40);

// Test with 3 words.
let ba_70: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*";
collected = Default::default();
count = 0;
for byte in ba_70.span() {
collected.append_byte(byte);
count += 1;
}
assert_eq!(collected, ba_70);
assert_eq!(count, 70);
assert_eq!(ba_70.span().into_iter().collect(), ba_70);
}

#[test]
fn test_byte_span_iterator_slices() {
// Slice within remainder word (< 31 bytes).
let ba_13: ByteArray = "Hello Shmello";
let span = ba_13.span().get(2..7).unwrap();
assert_eq!(
span.into_iter().collect(), array!['l', 'l', 'o', ' ', 'S'], "slice within remainder word",
);

// Iterate slice across 2 words (1 data + remainder).
let ba_33: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
let span = ba_33.span().get(27..32).unwrap();
assert_eq!(span.into_iter().collect(), array!['b', 'c', 'd', 'e', 'f'], "slice across 2 words");

// Iterate slice across 3 words.
let ba_66: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$";
let span = ba_66.span().get(29..64).unwrap();
assert_eq!(
span.into_iter().collect(),
array![
'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!',
'@',
],
"slice across 3 words",
);

// Test case where data array is exhausted and remainder has exactly 1 byte.
let ba_32: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef";
let span = ba_32.span().get(31..32).unwrap();
assert_eq!(span.into_iter().collect(), array!['f'], "1 byte in remainder after data exhausted");
}