Skip to content

Commit 471439b

Browse files
author
Gilad Chase
committed
feat(byte_array): add slice
1. When a slice ends before a word boundary (it's last word isn't a full 31 bytes long) , it's last word is copied into the remainder word. Rationale: this is consistent with `ByteArray`'s `pending word`, and allows slices of full bytes31 that include an end_suffix to be shifted-right without allocating a new array. 2. When slices include a start-offset, the offset is applied lazily only upon `into`ing into a `ByteArray`, otherwise it's only recorded in the `first_char_start_offset` field.
1 parent b769bea commit 471439b

File tree

2 files changed

+205
-8
lines changed

2 files changed

+205
-8
lines changed

corelib/src/byte_array.cairo

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ use crate::cmp::min;
5555
use crate::integer::{U32TryIntoNonZero, u128_safe_divmod};
5656
#[feature("bounded-int-utils")]
5757
use crate::internal::bounded_int::{BoundedInt, downcast, upcast};
58+
use crate::num::traits::CheckedAdd;
5859
#[allow(unused_imports)]
5960
use crate::serde::Serde;
6061
use crate::traits::{Into, TryInto};
@@ -690,6 +691,38 @@ pub impl ByteSpanImpl of ByteSpanTrait {
690691
ba.append_aligned_byte_span(self);
691692
ba
692693
}
694+
695+
/// Returns a slice of the ByteSpan from the given start position with the given length.
696+
fn slice(self: @ByteSpan, start: usize, len: usize) -> Option<ByteSpan> {
697+
if len == 0 {
698+
return Some(Default::default());
699+
}
700+
if start.checked_add(len)? > self.len() {
701+
return None;
702+
}
703+
704+
let abs_start = start + upcast(*self.first_char_start_offset);
705+
let (start_word, start_offset) = DivRem::div_rem(abs_start, BYTES_IN_BYTES31_NONZERO);
706+
let (end_word, end_offset) = DivRem::div_rem(abs_start + len, BYTES_IN_BYTES31_NONZERO);
707+
let data_len = self.data.len();
708+
709+
let remainder_with_end_offset_trimmed = if end_word < data_len {
710+
let word = (*self.data[end_word]).into();
711+
shift_right(word, BYTES_IN_BYTES31, BYTES_IN_BYTES31 - end_offset)
712+
} else {
713+
let remainder_len = upcast(*self.remainder_len);
714+
shift_right(*self.remainder_word, remainder_len, remainder_len - end_offset)
715+
};
716+
717+
return Some(
718+
ByteSpan {
719+
data: self.data.slice(start_word, min(end_word, data_len) - start_word),
720+
first_char_start_offset: downcast(start_offset).unwrap(),
721+
remainder_word: remainder_with_end_offset_trimmed,
722+
remainder_len: downcast(end_offset).unwrap(),
723+
},
724+
);
725+
}
693726
}
694727

695728
impl ByteSpanDefault of Default<ByteSpan> {
@@ -723,6 +756,21 @@ impl ByteSpanToByteSpan of ToByteSpanTrait<ByteSpan> {
723756
}
724757
}
725758

759+
/// Shifts a word right by `n_bytes`.
760+
/// The input `bytes31` and the output `bytes31`s are represented using `felt252`s to improve
761+
/// performance.
762+
///
763+
/// Note: this function assumes that:
764+
/// 1. `word` is validly convertible to a `bytes31` which has no more than `word_len` bytes of data.
765+
/// 2. `n_bytes <= word_len`.
766+
/// 3. `word_len <= BYTES_IN_BYTES31`.
767+
/// If these assumptions are not met, it can corrupt the result. Thus, this should be a
768+
/// private function. We could add masking/assertions but it would be more expensive.
769+
fn shift_right(word: felt252, word_len: usize, n_bytes: usize) -> felt252 {
770+
let (_shifted_out, after_shift_right) = split_bytes31(word, word_len, n_bytes);
771+
after_shift_right
772+
}
773+
726774
mod helpers {
727775
use core::num::traits::Bounded;
728776
use crate::bytes_31::BYTES_IN_BYTES31;

corelib/src/test/byte_array_test.cairo

Lines changed: 157 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::byte_array::{ByteSpanTrait, ToByteSpanTrait};
2+
use crate::num::traits::Bounded;
23
use crate::test::test_utils::{assert_eq, assert_ne};
34

45
#[test]
@@ -507,7 +508,6 @@ fn test_from_collect() {
507508
assert_eq!(ba, "hello");
508509
}
509510

510-
// TODO(giladchase): add dedicated is_empty test once we have `slice`.
511511
#[test]
512512
fn test_span_len() {
513513
// Test simple happy flow --- value is included in the last word.
@@ -517,28 +517,60 @@ fn test_span_len() {
517517
assert_eq!(span.len(), 1);
518518
assert!(!span.is_empty());
519519

520+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
521+
let span = ba_31.span();
522+
assert_eq!(span.len(), 31, "wrong span len");
523+
assert!(!span.is_empty());
524+
520525
// Test empty.
521526
let empty_ba: ByteArray = "";
522527
let empty_span = empty_ba.span();
523528
assert_eq!(empty_span.len(), 0);
524529
assert!(empty_span.is_empty());
525530

526-
// TODO(giladchase): Add start-offset using slice once supported.
527531
// First word in the array, second in last word.
528532
let two_byte31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefg";
529-
let mut single_span = two_byte31.span();
530-
assert_eq!(single_span.len(), 33, "len error with start offset");
533+
let mut single_span = two_byte31.span().slice(1, 32).unwrap();
534+
assert_eq!(single_span.len(), 32, "len error with start offset");
531535
assert!(!single_span.is_empty());
532536

533-
// TODO(giladchase): Add start-offset using slice once supported.
534537
// First word in the array, second in the array, third in last word.
535538
let three_bytes31: ByteArray =
536539
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 chars.
537-
let mut three_span = three_bytes31.span();
538-
assert_eq!(three_span.len(), 64, "len error with size-3 bytearray");
540+
let mut three_span = three_bytes31.span().slice(1, 63).unwrap();
541+
assert_eq!(three_span.len(), 63, "len error with size-3 bytearray");
539542
assert!(!three_span.is_empty());
540543
}
541544

545+
#[test]
546+
fn test_span_slice_is_empty() {
547+
let ba: ByteArray = "hello";
548+
let span = ba.span();
549+
550+
let empty = span.slice(2, 0).unwrap();
551+
assert_eq!(empty.len(), 0);
552+
assert!(empty.is_empty());
553+
554+
let empty_string: ByteArray = "";
555+
assert_eq!(empty_string, empty.to_byte_array());
556+
557+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde";
558+
let span = ba_31.span();
559+
assert!(span.slice(30, 0).unwrap().is_empty());
560+
assert!(span.slice(31, 0).unwrap().is_empty());
561+
assert!(!span.slice(30, 1).unwrap().is_empty());
562+
assert!(!span.slice(15, 16).unwrap().is_empty());
563+
assert!(!span.slice(16, 15).unwrap().is_empty());
564+
565+
let ba_30: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcd";
566+
let span = ba_30.span();
567+
assert!(span.slice(29, 0).unwrap().is_empty());
568+
assert!(span.slice(30, 0).unwrap().is_empty());
569+
assert!(!span.slice(29, 1).unwrap().is_empty());
570+
assert!(!span.slice(14, 15).unwrap().is_empty());
571+
assert!(!span.slice(15, 14).unwrap().is_empty());
572+
}
573+
542574
#[test]
543575
fn test_span_copy() {
544576
let ba: ByteArray = "12";
@@ -559,6 +591,100 @@ fn test_span_copy() {
559591
assert_eq!(ba, span.to_byte_array());
560592
}
561593

594+
#[test]
595+
fn test_span_slice_empty() {
596+
let ba: ByteArray = "hello";
597+
let span = ba.span();
598+
599+
let empty = span.slice(2, 0).unwrap();
600+
assert_eq!(empty.len(), 0);
601+
assert!(empty.is_empty());
602+
603+
let empty_string: ByteArray = "";
604+
assert_eq!(empty_string, empty.to_byte_array());
605+
}
606+
607+
// TODO(giladchase): replace assert+is_none with assert_eq when we have PartialEq.
608+
#[test]
609+
fn test_span_slice_out_of_bounds() {
610+
let ba: ByteArray = "hello";
611+
let span = ba.span();
612+
613+
assert!(span.slice(3, 5).is_none(), "end out of bounds");
614+
assert!(span.slice(6, 1).is_none(), "start out of bounds");
615+
616+
assert!(
617+
span.slice(1, 3).unwrap().slice(Bounded::<usize>::MAX, 1).is_none(),
618+
"start offset overflow",
619+
);
620+
assert!(span.slice(Bounded::<usize>::MAX, 1).is_none());
621+
assert!(span.slice(1, Bounded::<usize>::MAX).is_none());
622+
623+
let empty_string: ByteArray = "";
624+
assert!(empty_string.span().slice(0, 2).is_none(), "empty slice is sliceable");
625+
}
626+
627+
#[test]
628+
fn test_span_slice_under_31_bytes() {
629+
// Word entirely in remainder word.
630+
let ba: ByteArray = "abcde";
631+
let span = ba.span();
632+
633+
let mut slice: ByteArray = span.slice(0, 3).unwrap().to_byte_array();
634+
assert_eq!(slice, "abc", "first 3 bytes");
635+
636+
slice = span.slice(2, 2).unwrap().to_byte_array();
637+
assert_eq!(slice, "cd", "middle 2 bytes");
638+
639+
slice = span.slice(4, 1).unwrap().to_byte_array();
640+
assert_eq!(slice, "e", "last byte");
641+
}
642+
#[test]
643+
fn test_span_slice_exactly_31_bytes() {
644+
// 1 full data word, empty last_word.
645+
let ba_31: ByteArray = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcde"; // 31 bytes
646+
let span31 = ba_31.span();
647+
assert_eq!(span31.len(), 31);
648+
649+
let ba: ByteArray = span31.slice(0, 31).unwrap().to_byte_array();
650+
assert_eq!(ba, ba_31);
651+
652+
// Debug: Let's check what byte is at position 10
653+
assert_eq!(ba_31.at(10), Some('K'));
654+
assert_eq!(ba_31.at(11), Some('L'));
655+
656+
// Partial slice
657+
let ba: ByteArray = span31.slice(10, 10).unwrap().to_byte_array();
658+
assert_eq!(ba, "KLMNOPQRST", "middle 10 bytes");
659+
}
660+
661+
#[test]
662+
fn test_span_slice_positions() {
663+
// Two full bytes31 + remainder with 2 bytes.
664+
let ba: ByteArray =
665+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
666+
let span = ba.span();
667+
668+
// Slice from middle of first word to middle of second word.
669+
let short_slice_across_data_words = span.slice(10, 30).unwrap();
670+
let mut ba_from_span: ByteArray = short_slice_across_data_words.to_byte_array();
671+
assert_eq!(ba_from_span, "KLMNOPQRSTUVWXYZabcdefghijklmn", "multi-word short slice failed");
672+
673+
// Slice spanning 3 words (two data and remainder).
674+
let long_slice_across_data_words = span.slice(5, 59).unwrap();
675+
ba_from_span = long_slice_across_data_words.to_byte_array();
676+
assert_eq!(
677+
ba_from_span,
678+
"FGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$",
679+
"multi-word long slice failed",
680+
);
681+
682+
// Slice from second word into remainder.
683+
let short_slice_into_remainder_word = span.slice(29, 20).unwrap();
684+
ba_from_span = short_slice_into_remainder_word.to_byte_array();
685+
assert_eq!(ba_from_span, "defghijklmnopqrstuvw", "short slice into remainder word failed");
686+
}
687+
562688
#[test]
563689
fn test_span_to_bytearray() {
564690
let empty_ba: ByteArray = "";
@@ -576,5 +702,28 @@ fn test_span_to_bytearray() {
576702
let even_larger_ba: ByteArray =
577703
"abcdeFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789#$"; // 64 bytes
578704
assert_eq!(even_larger_ba.span().to_byte_array(), even_larger_ba);
579-
// TODO(giladchase): test with slice.
705+
706+
// Test sliced span with offset.
707+
let sliced: ByteArray = large_ba.span().slice(10, 25).unwrap().to_byte_array();
708+
assert_eq!(sliced, ":;<=>?@ABCDEFGHIJKLMNOPQR");
709+
}
710+
711+
#[test]
712+
fn test_span_multiple_start_offset_slicing() {
713+
// Test to demonstrate the optimization of lazy start-offset trimming.
714+
// Multiple slicing operations on remainder word should be more efficient.
715+
let ba: ByteArray = "abcdef"; // 10 bytes in remainder
716+
let span = ba.span();
717+
718+
let slice1 = span.slice(1, 5).unwrap();
719+
let slice2 = slice1.slice(1, 4).unwrap();
720+
let slice3 = slice2.slice(1, 3).unwrap();
721+
722+
// Convert to ByteArray to verify correctness
723+
let result1: ByteArray = slice1.to_byte_array();
724+
assert_eq!(result1, "bcdef", "first slice");
725+
let result2: ByteArray = slice2.to_byte_array();
726+
assert_eq!(result2, "cdef", "second slice");
727+
let result3: ByteArray = slice3.to_byte_array();
728+
assert_eq!(result3, "def", "third slice");
580729
}

0 commit comments

Comments
 (0)