feat(byte_array): add slice

Gilad Chase · Gilad Chase · commit 235db8bddb28 · 2025-09-18T09:56:17.000+03:00
1. When a slice ends before a word boundary (it's last word isn't a full 31 bytes long)
, it's last word is copied into the remainder word.
Rationale: this is consistent with `ByteArray`'s `pending word`, and allows slices of full bytes31
that include an end_suffix to be shifted-right without allocating a new array.

2. When slices include a start-offset, the offset is applied lazily only upon `into`ing into a
   `ByteArray`, otherwise it's only recorded in the `first_char_start_offset` field.
diff --git a/corelib/src/byte_array.cairo b/corelib/src/byte_array.cairo
@@ -55,6 +55,7 @@ use crate::cmp::min;
 use crate::integer::{U32TryIntoNonZero, u128_safe_divmod};
 #[feature("bounded-int-utils")]
 use crate::internal::bounded_int::{BoundedInt, downcast, upcast};
+use crate::num::traits::CheckedAdd;
 #[allow(unused_imports)]
 use crate::serde::Serde;
 use crate::traits::{Into, TryInto};
@@ -651,6 +652,38 @@ pub impl ByteArraySpanImpl of ByteSpanTrait {
     fn is_empty(self: @ByteSpan) -> bool {
         self.data.len() == 0 && *self.remainder_len == 0
     }
+
+    /// Returns a slice of the ByteSpan from the given start position with the given length.
+    fn slice(self: @ByteSpan, start: usize, len: usize) -> Option<ByteSpan> {
+        if len == 0 {
+            return Some(Default::default());
+        }
+        if start.checked_add(len)? > self.len() {
+            return None;
+        }
+
+        let abs_start = start + upcast(*self.first_char_start_offset);
+        let (start_word, start_offset) = DivRem::div_rem(abs_start, BYTES_IN_BYTES31_NONZERO);
+        let (end_word, end_offset) = DivRem::div_rem(abs_start + len, BYTES_IN_BYTES31_NONZERO);
+        let data_len = self.data.len();
+
+        let remainder_with_end_offset_trimmed = if end_word < data_len {
+            let word = (*self.data[end_word]).into();
+            shift_right(word, BYTES_IN_BYTES31, BYTES_IN_BYTES31 - end_offset)
+        } else {
+            let remainder_len = upcast(*self.remainder_len);
+            shift_right(*self.remainder_word, remainder_len, remainder_len - end_offset)
+        };
+
+        return Some(
+            ByteSpan {
+                data: self.data.slice(start_word, min(end_word, data_len) - start_word),
+                first_char_start_offset: downcast(start_offset).unwrap(),
+                remainder_word: remainder_with_end_offset_trimmed,
+                remainder_len: downcast(end_offset).unwrap(),
+            },
+        );
+    }
 }
 
 impl ByteSpanDefault of Default<ByteSpan> {
@@ -718,3 +751,18 @@ impl ByteSpanIntoByteArray of Into<ByteSpan, ByteArray> {
         ba
     }
 }
+
+/// Shifts a word right by `n_bytes`.
+/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
+/// performance.
+///
+/// Note: this function assumes that:
+/// 1. `word` is validly convertible to a `bytes31` which has no more than `word_len` bytes of data.
+/// 2. `n_bytes <= word_len`.
+/// 3. `word_len <= BYTES_IN_BYTES31`.
+/// If these assumptions are not met, it can corrupt the result. Thus, this should be a
+/// private function. We could add masking/assertions but it would be more expensive.
+fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
+    let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
+    after_shift_right
+}
diff --git a/corelib/src/test/byte_array_test.cairo b/corelib/src/test/byte_array_test.cairo
@@ -1,4 +1,5 @@
 use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
+use crate::num::traits::Bounded;
 use crate::test::test_utils::{assert_eq, assert_ne};
 
 #[test]
@@ -21,7 +22,10 @@ fn test_append_word() {
     assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg", "append word overflowing pending word");
 
     ba.append_word('hi', 2);
-    assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending word");
+    assert_eq!(
+        ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending
+    word",
+    );
 
     // Length is 0, so nothing is actually appended.
     ba.append_word('jk', 0);
@@ -507,7 +511,6 @@ fn test_from_collect() {
     assert_eq!(ba, "hello");
 }
 
-// TODO(giladchase): add dedicated is_empty test once we have `slice`.
 #[test]
 fn test_span_len() {
     // Test simple happy flow --- value is included in the last word.
@@ -517,28 +520,60 @@ fn test_span_len() {
     assert_eq!(span.len(), 1);
     assert!(!span.is_empty());
 
+    let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
+    let span = ba_31.span();
+    assert_eq!(span.len(), 31, "wrong span len");
+    assert!(!span.is_empty());
+
     // Test empty.
     let empty_ba: ByteArray = "";
     let empty_span = empty_ba.span();
     assert_eq!(empty_span.len(), 0);
     assert!(empty_span.is_empty());
 
-    // TODO(giladchase): Add start-offset using slice once supported.
     // First word in the array, second in last word.
     let two_byte31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
-    let mut single_span = two_byte31.span();
-    assert_eq!(single_span.len(), 33, "len error with start offset");
+    let mut single_span = two_byte31.span().slice(1, 32).unwrap();
+    assert_eq!(single_span.len(), 32, "len error with start offset");
     assert!(!single_span.is_empty());
 
-    // TODO(giladchase): Add start-offset using slice once supported.
     // First word in the array, second in the array, third in last word.
     let three_bytes31: ByteArray =
         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 chars.
-    let mut three_span = three_bytes31.span();
-    assert_eq!(three_span.len(), 64, "len error with size-3 bytearray");
+    let mut three_span = three_bytes31.span().slice(1, 63).unwrap();
+    assert_eq!(three_span.len(), 63, "len error with size-3 bytearray");
     assert!(!three_span.is_empty());
 }
 
+#[test]
+fn test_span_slice_is_empty() {
+    let ba: ByteArray = "hello";
+    let span = ba.span();
+
+    let empty = span.slice(2, 0).unwrap();
+    assert_eq!(empty.len(), 0);
+    assert!(empty.is_empty());
+
+    let empty_string: ByteArray = "";
+    assert_eq!(empty_string, empty.into());
+
+    let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
+    let span = ba_31.span();
+    assert!(span.slice(30, 0).unwrap().is_empty());
+    assert!(span.slice(31, 0).unwrap().is_empty());
+    assert!(!span.slice(30, 1).unwrap().is_empty());
+    assert!(!span.slice(15, 16).unwrap().is_empty());
+    assert!(!span.slice(16, 15).unwrap().is_empty());
+
+    let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
+    let span = ba_30.span();
+    assert!(span.slice(29, 0).unwrap().is_empty());
+    assert!(span.slice(30, 0).unwrap().is_empty());
+    assert!(!span.slice(29, 1).unwrap().is_empty());
+    assert!(!span.slice(14, 15).unwrap().is_empty());
+    assert!(!span.slice(15, 14).unwrap().is_empty());
+}
+
 #[test]
 fn test_span_copy() {
     let ba: ByteArray = "12";
@@ -559,6 +594,100 @@ fn test_span_copy() {
     assert_eq!(ba, span.into());
 }
 
+#[test]
+fn test_span_slice_empty() {
+    let ba: ByteArray = "hello";
+    let span = ba.span();
+
+    let empty = span.slice(2, 0).unwrap();
+    assert_eq!(empty.len(), 0);
+    assert!(empty.is_empty());
+
+    let empty_string: ByteArray = "";
+    assert_eq!(empty_string, empty.into());
+}
+
+// TODO(giladchase): replace assert+is_none with assert_eq when we have PartialEq.
+#[test]
+fn test_span_slice_out_of_bounds() {
+    let ba: ByteArray = "hello";
+    let span = ba.span();
+
+    assert!(span.slice(3, 5).is_none(), "end out of bounds");
+    assert!(span.slice(6, 1).is_none(), "start out of bounds");
+
+    assert!(
+        span.slice(1, 3).unwrap().slice(Bounded::<usize>::MAX, 1).is_none(),
+        "start offset overflow",
+    );
+    assert!(span.slice(Bounded::<usize>::MAX, 1).is_none());
+    assert!(span.slice(1, Bounded::<usize>::MAX).is_none());
+
+    let empty_string: ByteArray = "";
+    assert!(empty_string.span().slice(0, 2).is_none(), "empty slice is sliceable");
+}
+
+#[test]
+fn test_span_slice_under_31_bytes() {
+    // Word entirely in remainder word.
+    let ba: ByteArray = "abcde";
+    let span = ba.span();
+
+    let mut slice: ByteArray = span.slice(0, 3).unwrap().into();
+    assert_eq!(slice, "abc", "first 3 bytes");
+
+    slice = span.slice(2, 2).unwrap().into();
+    assert_eq!(slice, "cd", "middle 2 bytes");
+
+    slice = span.slice(4, 1).unwrap().into();
+    assert_eq!(slice, "e", "last byte");
+}
+#[test]
+fn test_span_slice_exactly_31_bytes() {
+    // 1 full data word, empty last_word.
+    let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde"; // 31 bytes
+    let span31 = ba_31.span();
+    assert_eq!(span31.len(), 31);
+
+    let ba: ByteArray = span31.slice(0, 31).unwrap().into();
+    assert_eq!(ba, ba_31);
+
+    // Debug: Let's check what byte is at position 10
+    assert_eq!(ba_31.at(10), Some('K'));
+    assert_eq!(ba_31.at(11), Some('L'));
+
+    // Partial slice
+    let ba: ByteArray = span31.slice(10, 10).unwrap().into();
+    assert_eq!(ba, "KLMNOPQRST", "middle 10 bytes");
+}
+
+#[test]
+fn test_span_slice_positions() {
+    // Two full bytes31 + remainder with 2 bytes.
+    let ba: ByteArray =
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
+    let span = ba.span();
+
+    // Slice from middle of first word to middle of second word.
+    let short_slice_across_data_words = span.slice(10, 30).unwrap();
+    let mut ba_from_span: ByteArray = short_slice_across_data_words.into();
+    assert_eq!(ba_from_span, "KLMNOPQRSTUVWXYZabcdefghijklmn", "multi-word short slice failed");
+
+    // Slice spanning multiple words.
+    let long_slice_across_data_words = span.slice(5, 50).unwrap();
+    ba_from_span = long_slice_across_data_words.into();
+    assert_eq!(
+        ba_from_span,
+        "FGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012",
+        "multi-word long slice failed",
+    );
+
+    // Slice from second word into remainder.
+    let short_slice_into_remainder_word = span.slice(29, 20).unwrap();
+    ba_from_span = short_slice_into_remainder_word.into();
+    assert_eq!(ba_from_span, "defghijklmnopqrstuvw", "short slice into remainder word failed");
+}
+
 #[test]
 fn test_span_into_bytearray() {
     let empty_ba: ByteArray = "";
@@ -571,5 +700,29 @@ fn test_span_into_bytearray() {
     // Data word and remainder.
     let large_ba: ByteArray = "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW"; // 40 bytes
     assert_eq!(large_ba.span().into(), large_ba);
-    // TODO(giladchase): test with slice.
+
+    // Test sliced span with offset.
+    let sliced: ByteArray = large_ba.span().slice(10, 25).unwrap().into();
+    assert_eq!(sliced, ":;<=>?@ABCDEFGHIJKLMNOPQR");
 }
+
+#[test]
+fn test_span_multiple_start_offset_slicing() {
+    // Test to demonstrate the optimization of lazy start-offset trimming.
+    // Multiple slicing operations on remainder word should be more efficient.
+    let ba: ByteArray = "abcdef"; // 10 bytes in remainder
+    let span = ba.span();
+
+    let slice1 = span.slice(1, 5).unwrap();
+    let slice2 = slice1.slice(1, 4).unwrap();
+    let slice3 = slice2.slice(1, 3).unwrap();
+
+    // Convert to ByteArray to verify correctness
+    let result1: ByteArray = slice1.into();
+    assert_eq!(result1, "bcdef", "first slice");
+    let result2: ByteArray = slice2.into();
+    assert_eq!(result2, "cdef", "second slice");
+    let result3: ByteArray = slice3.into();
+    assert_eq!(result3, "def", "third slice");
+}
+