diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index a8ae1a765414..d7ed39bbe249 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -17,7 +17,8 @@ //! Utils for working with packed bit masks -use crate::bit_util::ceil; +use crate::bit_chunk_iterator::BitChunks; +use crate::bit_util::{apply_bitwise_binary_op, ceil}; /// Util function to set bits in a slice of bytes. /// @@ -32,28 +33,20 @@ pub fn set_bits( offset_read: usize, len: usize, ) -> usize { - assert!(offset_write + len <= write_data.len() * 8); - assert!(offset_read + len <= data.len() * 8); - let mut null_count = 0; - let mut acc = 0; - while len > acc { - // SAFETY: the arguments to `set_upto_64bits` are within the valid range because - // (offset_write + acc) + (len - acc) == offset_write + len <= write_data.len() * 8 - // (offset_read + acc) + (len - acc) == offset_read + len <= data.len() * 8 - let (n, len_set) = unsafe { - set_upto_64bits( - write_data, - data, - offset_write + acc, - offset_read + acc, - len - acc, - ) - }; - null_count += n; - acc += len_set; - } - - null_count + apply_bitwise_binary_op( + write_data, + offset_write, + data, + offset_read, + len, + |_a, b| b, // copy bits from to_set + ); + + // TODO move this into a function in bit_utils (and refactor BooleanArray to use it) + // count zero bits in data[offset_read..offset_read+len] + let chunks = BitChunks::new(data, offset_read, len); + let num_ones: usize = chunks.iter_padded().map(|a| a.count_ones() as usize).sum(); + len - num_ones } /// Similar to `set_bits` but sets only upto 64 bits, actual number of bits set may vary. diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index 67c72fc08906..d544465ed925 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -226,7 +226,7 @@ pub fn apply_bitwise_binary_op( let right_byte_offset = right_offset_in_bits / 8; // Read the same amount of bits from the right buffer - let right_first_byte: u8 = crate::util::bit_util::read_up_to_byte_from_offset( + let right_first_byte: u8 = read_up_to_byte_from_offset( &right.as_ref()[right_byte_offset..], bits_to_next_byte, // Right bit offset diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 408dfbaac909..620d39a082e7 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -86,8 +86,6 @@ unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { } /// Aligns the provided `nulls` to the provided `data_offset` -/// -/// This is a temporary measure until offset is removed from ArrayData (#1799) fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option { let nulls = nulls?; if data_offset == nulls.offset() {