Skip to content

Commit 235db8b

Browse files
author
Gilad Chase
committed
feat(byte_array): add slice
1. When a slice ends before a word boundary (it's last word isn't a full 31 bytes long) , it's last word is copied into the remainder word. Rationale: this is consistent with `ByteArray`'s `pending word`, and allows slices of full bytes31 that include an end_suffix to be shifted-right without allocating a new array. 2. When slices include a start-offset, the offset is applied lazily only upon `into`ing into a `ByteArray`, otherwise it's only recorded in the `first_char_start_offset` field.
1 parent 9c5f473 commit 235db8b

File tree

2 files changed

+210
-9
lines changed

2 files changed

+210
-9
lines changed

corelib/src/byte_array.cairo

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use crate::cmp::min;
5555
use crate::integer::{U32TryIntoNonZero, u128_safe_divmod};
5656
#[feature("bounded-int-utils")]
5757
use crate::internal::bounded_int::{BoundedInt, downcast, upcast};
58+
use crate::num::traits::CheckedAdd;
5859
#[allow(unused_imports)]
5960
use crate::serde::Serde;
6061
use crate::traits::{Into, TryInto};
@@ -651,6 +652,38 @@ pub impl ByteArraySpanImpl of ByteSpanTrait {
651652
fn is_empty(self: @ByteSpan) -> bool {
652653
self.data.len() == 0 && *self.remainder_len == 0
653654
}
655+
656+
/// Returns a slice of the ByteSpan from the given start position with the given length.
657+
fn slice(self: @ByteSpan, start: usize, len: usize) -> Option<ByteSpan> {
658+
if len == 0 {
659+
return Some(Default::default());
660+
}
661+
if start.checked_add(len)? > self.len() {
662+
return None;
663+
}
664+
665+
let abs_start = start + upcast(*self.first_char_start_offset);
666+
let (start_word, start_offset) = DivRem::div_rem(abs_start, BYTES_IN_BYTES31_NONZERO);
667+
let (end_word, end_offset) = DivRem::div_rem(abs_start + len, BYTES_IN_BYTES31_NONZERO);
668+
let data_len = self.data.len();
669+
670+
let remainder_with_end_offset_trimmed = if end_word < data_len {
671+
let word = (*self.data[end_word]).into();
672+
shift_right(word, BYTES_IN_BYTES31, BYTES_IN_BYTES31 - end_offset)
673+
} else {
674+
let remainder_len = upcast(*self.remainder_len);
675+
shift_right(*self.remainder_word, remainder_len, remainder_len - end_offset)
676+
};
677+
678+
return Some(
679+
ByteSpan {
680+
data: self.data.slice(start_word, min(end_word, data_len) - start_word),
681+
first_char_start_offset: downcast(start_offset).unwrap(),
682+
remainder_word: remainder_with_end_offset_trimmed,
683+
remainder_len: downcast(end_offset).unwrap(),
684+
},
685+
);
686+
}
654687
}
655688

656689
impl ByteSpanDefault of Default<ByteSpan> {
@@ -718,3 +751,18 @@ impl ByteSpanIntoByteArray of Into<ByteSpan, ByteArray> {
718751
ba
719752
}
720753
}
754+
755+
/// Shifts a word right by `n_bytes`.
756+
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
757+
/// performance.
758+
///
759+
/// Note: this function assumes that:
760+
/// 1. `word` is validly convertible to a `bytes31` which has no more than `word_len` bytes of data.
761+
/// 2. `n_bytes <= word_len`.
762+
/// 3. `word_len <= BYTES_IN_BYTES31`.
763+
/// If these assumptions are not met, it can corrupt the result. Thus, this should be a
764+
/// private function. We could add masking/assertions but it would be more expensive.
765+
fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
766+
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
767+
after_shift_right
768+
}

corelib/src/test/byte_array_test.cairo

Lines changed: 162 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
2+
use crate::num::traits::Bounded;
23
use crate::test::test_utils::{assert_eq, assert_ne};
34

45
#[test]
@@ -21,7 +22,10 @@ fn test_append_word() {
2122
assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg", "append word overflowing pending word");
2223

2324
ba.append_word('hi', 2);
24-
assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending word");
25+
assert_eq!(
26+
ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending
27+
word",
28+
);
2529

2630
// Length is 0, so nothing is actually appended.
2731
ba.append_word('jk', 0);
@@ -507,7 +511,6 @@ fn test_from_collect() {
507511
assert_eq!(ba, "hello");
508512
}
509513

510-
// TODO(giladchase): add dedicated is_empty test once we have `slice`.
511514
#[test]
512515
fn test_span_len() {
513516
// Test simple happy flow --- value is included in the last word.
@@ -517,28 +520,60 @@ fn test_span_len() {
517520
assert_eq!(span.len(), 1);
518521
assert!(!span.is_empty());
519522

523+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
524+
let span = ba_31.span();
525+
assert_eq!(span.len(), 31, "wrong span len");
526+
assert!(!span.is_empty());
527+
520528
// Test empty.
521529
let empty_ba: ByteArray = "";
522530
let empty_span = empty_ba.span();
523531
assert_eq!(empty_span.len(), 0);
524532
assert!(empty_span.is_empty());
525533

526-
// TODO(giladchase): Add start-offset using slice once supported.
527534
// First word in the array, second in last word.
528535
let two_byte31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
529-
let mut single_span = two_byte31.span();
530-
assert_eq!(single_span.len(), 33, "len error with start offset");
536+
let mut single_span = two_byte31.span().slice(1, 32).unwrap();
537+
assert_eq!(single_span.len(), 32, "len error with start offset");
531538
assert!(!single_span.is_empty());
532539

533-
// TODO(giladchase): Add start-offset using slice once supported.
534540
// First word in the array, second in the array, third in last word.
535541
let three_bytes31: ByteArray =
536542
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 chars.
537-
let mut three_span = three_bytes31.span();
538-
assert_eq!(three_span.len(), 64, "len error with size-3 bytearray");
543+
let mut three_span = three_bytes31.span().slice(1, 63).unwrap();
544+
assert_eq!(three_span.len(), 63, "len error with size-3 bytearray");
539545
assert!(!three_span.is_empty());
540546
}
541547

548+
#[test]
549+
fn test_span_slice_is_empty() {
550+
let ba: ByteArray = "hello";
551+
let span = ba.span();
552+
553+
let empty = span.slice(2, 0).unwrap();
554+
assert_eq!(empty.len(), 0);
555+
assert!(empty.is_empty());
556+
557+
let empty_string: ByteArray = "";
558+
assert_eq!(empty_string, empty.into());
559+
560+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
561+
let span = ba_31.span();
562+
assert!(span.slice(30, 0).unwrap().is_empty());
563+
assert!(span.slice(31, 0).unwrap().is_empty());
564+
assert!(!span.slice(30, 1).unwrap().is_empty());
565+
assert!(!span.slice(15, 16).unwrap().is_empty());
566+
assert!(!span.slice(16, 15).unwrap().is_empty());
567+
568+
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
569+
let span = ba_30.span();
570+
assert!(span.slice(29, 0).unwrap().is_empty());
571+
assert!(span.slice(30, 0).unwrap().is_empty());
572+
assert!(!span.slice(29, 1).unwrap().is_empty());
573+
assert!(!span.slice(14, 15).unwrap().is_empty());
574+
assert!(!span.slice(15, 14).unwrap().is_empty());
575+
}
576+
542577
#[test]
543578
fn test_span_copy() {
544579
let ba: ByteArray = "12";
@@ -559,6 +594,100 @@ fn test_span_copy() {
559594
assert_eq!(ba, span.into());
560595
}
561596

597+
#[test]
598+
fn test_span_slice_empty() {
599+
let ba: ByteArray = "hello";
600+
let span = ba.span();
601+
602+
let empty = span.slice(2, 0).unwrap();
603+
assert_eq!(empty.len(), 0);
604+
assert!(empty.is_empty());
605+
606+
let empty_string: ByteArray = "";
607+
assert_eq!(empty_string, empty.into());
608+
}
609+
610+
// TODO(giladchase): replace assert+is_none with assert_eq when we have PartialEq.
611+
#[test]
612+
fn test_span_slice_out_of_bounds() {
613+
let ba: ByteArray = "hello";
614+
let span = ba.span();
615+
616+
assert!(span.slice(3, 5).is_none(), "end out of bounds");
617+
assert!(span.slice(6, 1).is_none(), "start out of bounds");
618+
619+
assert!(
620+
span.slice(1, 3).unwrap().slice(Bounded::<usize>::MAX, 1).is_none(),
621+
"start offset overflow",
622+
);
623+
assert!(span.slice(Bounded::<usize>::MAX, 1).is_none());
624+
assert!(span.slice(1, Bounded::<usize>::MAX).is_none());
625+
626+
let empty_string: ByteArray = "";
627+
assert!(empty_string.span().slice(0, 2).is_none(), "empty slice is sliceable");
628+
}
629+
630+
#[test]
631+
fn test_span_slice_under_31_bytes() {
632+
// Word entirely in remainder word.
633+
let ba: ByteArray = "abcde";
634+
let span = ba.span();
635+
636+
let mut slice: ByteArray = span.slice(0, 3).unwrap().into();
637+
assert_eq!(slice, "abc", "first 3 bytes");
638+
639+
slice = span.slice(2, 2).unwrap().into();
640+
assert_eq!(slice, "cd", "middle 2 bytes");
641+
642+
slice = span.slice(4, 1).unwrap().into();
643+
assert_eq!(slice, "e", "last byte");
644+
}
645+
#[test]
646+
fn test_span_slice_exactly_31_bytes() {
647+
// 1 full data word, empty last_word.
648+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde"; // 31 bytes
649+
let span31 = ba_31.span();
650+
assert_eq!(span31.len(), 31);
651+
652+
let ba: ByteArray = span31.slice(0, 31).unwrap().into();
653+
assert_eq!(ba, ba_31);
654+
655+
// Debug: Let's check what byte is at position 10
656+
assert_eq!(ba_31.at(10), Some('K'));
657+
assert_eq!(ba_31.at(11), Some('L'));
658+
659+
// Partial slice
660+
let ba: ByteArray = span31.slice(10, 10).unwrap().into();
661+
assert_eq!(ba, "KLMNOPQRST", "middle 10 bytes");
662+
}
663+
664+
#[test]
665+
fn test_span_slice_positions() {
666+
// Two full bytes31 + remainder with 2 bytes.
667+
let ba: ByteArray =
668+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
669+
let span = ba.span();
670+
671+
// Slice from middle of first word to middle of second word.
672+
let short_slice_across_data_words = span.slice(10, 30).unwrap();
673+
let mut ba_from_span: ByteArray = short_slice_across_data_words.into();
674+
assert_eq!(ba_from_span, "KLMNOPQRSTUVWXYZabcdefghijklmn", "multi-word short slice failed");
675+
676+
// Slice spanning multiple words.
677+
let long_slice_across_data_words = span.slice(5, 50).unwrap();
678+
ba_from_span = long_slice_across_data_words.into();
679+
assert_eq!(
680+
ba_from_span,
681+
"FGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012",
682+
"multi-word long slice failed",
683+
);
684+
685+
// Slice from second word into remainder.
686+
let short_slice_into_remainder_word = span.slice(29, 20).unwrap();
687+
ba_from_span = short_slice_into_remainder_word.into();
688+
assert_eq!(ba_from_span, "defghijklmnopqrstuvw", "short slice into remainder word failed");
689+
}
690+
562691
#[test]
563692
fn test_span_into_bytearray() {
564693
let empty_ba: ByteArray = "";
@@ -571,5 +700,29 @@ fn test_span_into_bytearray() {
571700
// Data word and remainder.
572701
let large_ba: ByteArray = "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW"; // 40 bytes
573702
assert_eq!(large_ba.span().into(), large_ba);
574-
// TODO(giladchase): test with slice.
703+
704+
// Test sliced span with offset.
705+
let sliced: ByteArray = large_ba.span().slice(10, 25).unwrap().into();
706+
assert_eq!(sliced, ":;<=>?@ABCDEFGHIJKLMNOPQR");
575707
}
708+
709+
#[test]
710+
fn test_span_multiple_start_offset_slicing() {
711+
// Test to demonstrate the optimization of lazy start-offset trimming.
712+
// Multiple slicing operations on remainder word should be more efficient.
713+
let ba: ByteArray = "abcdef"; // 10 bytes in remainder
714+
let span = ba.span();
715+
716+
let slice1 = span.slice(1, 5).unwrap();
717+
let slice2 = slice1.slice(1, 4).unwrap();
718+
let slice3 = slice2.slice(1, 3).unwrap();
719+
720+
// Convert to ByteArray to verify correctness
721+
let result1: ByteArray = slice1.into();
722+
assert_eq!(result1, "bcdef", "first slice");
723+
let result2: ByteArray = slice2.into();
724+
assert_eq!(result2, "cdef", "second slice");
725+
let result3: ByteArray = slice3.into();
726+
assert_eq!(result3, "def", "third slice");
727+
}
728+

0 commit comments

Comments
 (0)