Skip to content

Commit d2487b7

Browse files
author
Gilad Chase
committed
feat(byte_array): add slice
1. When a slice ends before a word boundary (it's last word isn't a full 31 bytes long) , it's last word is copied into the remainder word. Rationale: this is consistent with `ByteArray`'s `pending word`, and allows slices of full bytes31 that include an end_suffix to be shifted-right without allocating a new array. 2. When slices include a start-offset, the offset is applied lazily only upon `into`ing into a `ByteArray`, otherwise it's only recorded in the `first_char_start_offset` field.
1 parent 558ac51 commit d2487b7

File tree

2 files changed

+181
-8
lines changed

2 files changed

+181
-8
lines changed

corelib/src/byte_array.cairo

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use crate::cmp::min;
5555
use crate::integer::{U32TryIntoNonZero, u128_safe_divmod};
5656
#[feature("bounded-int-utils")]
5757
use crate::internal::bounded_int::{BoundedInt, downcast, upcast};
58+
use crate::num::traits::CheckedAdd;
5859
#[allow(unused_imports)]
5960
use crate::serde::Serde;
6061
use crate::traits::{Into, TryInto};
@@ -651,6 +652,38 @@ pub impl ByteArraySpanImpl of ByteSpanTrait {
651652
fn is_empty(self: @ByteSpan) -> bool {
652653
self.len() == 0
653654
}
655+
656+
/// Returns a slice of the ByteSpan from the given start position with the given length.
657+
fn slice(self: @ByteSpan, start: usize, len: usize) -> Option<ByteSpan> {
658+
if len == 0 {
659+
return Some(Default::default());
660+
}
661+
if start.checked_add(len)? > self.len() {
662+
return None;
663+
}
664+
665+
let abs_start = start + upcast(*self.first_char_start_offset);
666+
let (start_word, start_offset) = DivRem::div_rem(abs_start, BYTES_IN_BYTES31_NONZERO);
667+
let (end_word, end_offset) = DivRem::div_rem(abs_start + len, BYTES_IN_BYTES31_NONZERO);
668+
let data_len = self.data.len();
669+
670+
let remainder_with_end_offset_trimmed = if end_word < data_len {
671+
let word = (*self.data[end_word]).into();
672+
shift_right(word, BYTES_IN_BYTES31, BYTES_IN_BYTES31 - end_offset)
673+
} else {
674+
let remainder_len = upcast(*self.remainder_len);
675+
shift_right(*self.remainder_word, remainder_len, remainder_len - end_offset)
676+
};
677+
678+
return Some(
679+
ByteSpan {
680+
data: self.data.slice(start_word, min(end_word, data_len) - start_word),
681+
first_char_start_offset: downcast(start_offset).unwrap(),
682+
remainder_word: remainder_with_end_offset_trimmed,
683+
remainder_len: downcast(end_offset).unwrap(),
684+
},
685+
);
686+
}
654687
}
655688

656689
impl ByteSpanDefault of Default<ByteSpan> {
@@ -718,3 +751,18 @@ impl ByteSpanIntoByteArray of Into<ByteSpan, ByteArray> {
718751
ba
719752
}
720753
}
754+
755+
/// Shifts a word right by `n_bytes`.
756+
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
757+
/// performance.
758+
///
759+
/// Note: this function assumes that:
760+
/// 1. `word` is validly convertible to a `bytes31` which has no more than `word_len` bytes of data.
761+
/// 2. `n_bytes <= word_len`.
762+
/// 3. `word_len <= BYTES_IN_BYTES31`.
763+
/// If these assumptions are not met, it can corrupt the result. Thus, this should be a
764+
/// private function. We could add masking/assertions but it would be more expensive.
765+
fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
766+
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
767+
after_shift_right
768+
}

corelib/src/test/byte_array_test.cairo

Lines changed: 133 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
2+
use crate::num::traits::Bounded;
23
use crate::test::test_utils::{assert_eq, assert_ne};
34

45
#[test]
@@ -21,7 +22,10 @@ fn test_append_word() {
2122
assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg", "append word overflowing pending word");
2223

2324
ba.append_word('hi', 2);
24-
assert_eq!(ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending word");
25+
assert_eq!(
26+
ba, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi", "append word extending new pending
27+
word",
28+
);
2529

2630
// Length is 0, so nothing is actually appended.
2731
ba.append_word('jk', 0);
@@ -516,25 +520,28 @@ fn test_span_len() {
516520
assert_eq!(span.len(), 1);
517521
assert!(!span.is_empty());
518522

523+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
524+
let span = ba_31.span();
525+
assert_eq!(span.len(), 31, "wrong span len");
526+
assert!(!span.is_empty());
527+
519528
// Test empty.
520529
let empty_ba: ByteArray = "";
521530
let empty_span = empty_ba.span();
522531
assert_eq!(empty_span.len(), 0);
523532
assert!(empty_span.is_empty());
524533

525-
// TODO(giladchase): Add start-offset using slice once supported.
526534
// First word in the array, second in last word.
527535
let two_byte31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
528-
let mut single_span = two_byte31.span();
529-
assert_eq!(single_span.len(), 33, "len error with start offset");
536+
let mut single_span = two_byte31.span().slice(1, 32).unwrap();
537+
assert_eq!(single_span.len(), 32, "len error with start offset");
530538
assert!(!single_span.is_empty());
531539

532-
// TODO(giladchase): Add start-offset using slice once supported.
533540
// First word in the array, second in the array, third in last word.
534541
let three_bytes31: ByteArray =
535542
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 chars.
536-
let mut three_span = three_bytes31.span();
537-
assert_eq!(three_span.len(), 64, "len error with size-3 bytearray");
543+
let mut three_span = three_bytes31.span().slice(1, 63).unwrap();
544+
assert_eq!(three_span.len(), 63, "len error with size-3 bytearray");
538545
assert!(!three_span.is_empty());
539546
}
540547

@@ -558,6 +565,100 @@ fn test_span_copy() {
558565
assert_eq!(ba, span.into());
559566
}
560567

568+
#[test]
569+
fn test_span_slice_empty() {
570+
let ba: ByteArray = "hello";
571+
let span = ba.span();
572+
573+
let empty = span.slice(2, 0).unwrap();
574+
assert_eq!(empty.len(), 0);
575+
assert!(empty.is_empty());
576+
577+
let empty_string: ByteArray = "";
578+
assert_eq!(empty_string, empty.into());
579+
}
580+
581+
// TODO(giladchase): replace assert+is_none with assert_eq when we have PartialEq.
582+
#[test]
583+
fn test_span_slice_out_of_bounds() {
584+
let ba: ByteArray = "hello";
585+
let span = ba.span();
586+
587+
assert!(span.slice(3, 5).is_none(), "end out of bounds");
588+
assert!(span.slice(6, 1).is_none(), "start out of bounds");
589+
590+
assert!(
591+
span.slice(1, 3).unwrap().slice(Bounded::<usize>::MAX, 1).is_none(),
592+
"start offset overflow",
593+
);
594+
assert!(span.slice(Bounded::<usize>::MAX, 1).is_none());
595+
assert!(span.slice(1, Bounded::<usize>::MAX).is_none());
596+
597+
let empty_string: ByteArray = "";
598+
assert!(empty_string.span().slice(0, 2).is_none(), "empty slice is sliceable");
599+
}
600+
601+
#[test]
602+
fn test_span_slice_under_31_bytes() {
603+
// Word entirely in remainder word.
604+
let ba: ByteArray = "abcde";
605+
let span = ba.span();
606+
607+
let mut slice: ByteArray = span.slice(0, 3).unwrap().into();
608+
assert_eq!(slice, "abc", "first 3 bytes");
609+
610+
slice = span.slice(2, 2).unwrap().into();
611+
assert_eq!(slice, "cd", "middle 2 bytes");
612+
613+
slice = span.slice(4, 1).unwrap().into();
614+
assert_eq!(slice, "e", "last byte");
615+
}
616+
#[test]
617+
fn test_span_slice_exactly_31_bytes() {
618+
// 1 full data word, empty last_word.
619+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde"; // 31 bytes
620+
let span31 = ba_31.span();
621+
assert_eq!(span31.len(), 31);
622+
623+
let ba: ByteArray = span31.slice(0, 31).unwrap().into();
624+
assert_eq!(ba, ba_31);
625+
626+
// Debug: Let's check what byte is at position 10
627+
assert_eq!(ba_31.at(10), Some('K'));
628+
assert_eq!(ba_31.at(11), Some('L'));
629+
630+
// Partial slice
631+
let ba: ByteArray = span31.slice(10, 10).unwrap().into();
632+
assert_eq!(ba, "KLMNOPQRST", "middle 10 bytes");
633+
}
634+
635+
#[test]
636+
fn test_span_slice_positions() {
637+
// Two full bytes31 + remainder with 2 bytes.
638+
let ba: ByteArray =
639+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
640+
let span = ba.span();
641+
642+
// Slice from middle of first word to middle of second word.
643+
let short_slice_across_data_words = span.slice(10, 30).unwrap();
644+
let mut ba_from_span: ByteArray = short_slice_across_data_words.into();
645+
assert_eq!(ba_from_span, "KLMNOPQRSTUVWXYZabcdefghijklmn", "multi-word short slice failed");
646+
647+
// Slice spanning multiple words.
648+
let long_slice_across_data_words = span.slice(5, 50).unwrap();
649+
ba_from_span = long_slice_across_data_words.into();
650+
assert_eq!(
651+
ba_from_span,
652+
"FGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz012",
653+
"multi-word long slice failed",
654+
);
655+
656+
// Slice from second word into remainder.
657+
let short_slice_into_remainder_word = span.slice(29, 20).unwrap();
658+
ba_from_span = short_slice_into_remainder_word.into();
659+
assert_eq!(ba_from_span, "defghijklmnopqrstuvw", "short slice into remainder word failed");
660+
}
661+
561662
#[test]
562663
fn test_span_into_bytearray() {
563664
let empty_ba: ByteArray = "";
@@ -570,5 +671,29 @@ fn test_span_into_bytearray() {
570671
// Data word and remainder.
571672
let large_ba: ByteArray = "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVW"; // 40 bytes
572673
assert_eq!(large_ba.span().into(), large_ba);
573-
// TODO(giladchase): test with slice.
674+
675+
// Test sliced span with offset.
676+
let sliced: ByteArray = large_ba.span().slice(10, 25).unwrap().into();
677+
assert_eq!(sliced, ":;<=>?@ABCDEFGHIJKLMNOPQR");
678+
}
679+
680+
#[test]
681+
fn test_span_multiple_start_offset_slicing() {
682+
// Test to demonstrate the optimization of lazy start-offset trimming.
683+
// Multiple slicing operations on remainder word should be more efficient.
684+
let ba: ByteArray = "abcdef"; // 10 bytes in remainder
685+
let span = ba.span();
686+
687+
let slice1 = span.slice(1, 5).unwrap();
688+
let slice2 = slice1.slice(1, 4).unwrap();
689+
let slice3 = slice2.slice(1, 3).unwrap();
690+
691+
// Convert to ByteArray to verify correctness
692+
let result1: ByteArray = slice1.into();
693+
assert_eq!(result1, "bcdef", "first slice");
694+
let result2: ByteArray = slice2.into();
695+
assert_eq!(result2, "cdef", "second slice");
696+
let result3: ByteArray = slice3.into();
697+
assert_eq!(result3, "def", "third slice");
574698
}
699+

0 commit comments

Comments
 (0)