Skip to content
2 changes: 1 addition & 1 deletion bitpacker/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod tests {
#[inline(never)]
fn create_bitpacked_data(bit_width: u8, num_els: u32) -> Vec<u8> {
let mut bitpacker = BitPacker::new();
let mut buffer = Vec::new();
let mut buffer = vec![];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Vec::new() is preferred for empty Vecs

for _ in 0..num_els {
// the values do not matter.
bitpacker.write(0u64, bit_width, &mut buffer).unwrap();
Expand Down
33 changes: 13 additions & 20 deletions bitpacker/src/bitpacker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,15 @@ use std::ops::{Range, RangeInclusive};

use bitpacking::{BitPacker as ExternalBitPackerTrait, BitPacker1x};

#[derive(Default)]
pub struct BitPacker {
mini_buffer: u64,
mini_buffer_written: usize,
}

impl Default for BitPacker {
fn default() -> Self {
BitPacker::new()
}
}
impl BitPacker {
pub fn new() -> BitPacker {
BitPacker {
mini_buffer: 0u64,
mini_buffer_written: 0,
}
pub fn new() -> Self {
Self::default()
}

#[inline]
Expand Down Expand Up @@ -48,7 +41,7 @@ impl BitPacker {

pub fn flush<TWrite: io::Write + ?Sized>(&mut self, output: &mut TWrite) -> io::Result<()> {
if self.mini_buffer_written > 0 {
let num_bytes = (self.mini_buffer_written + 7) / 8;
let num_bytes = self.mini_buffer_written.div_ceil(8);
let bytes = self.mini_buffer.to_le_bytes();
output.write_all(&bytes[..num_bytes])?;
self.mini_buffer_written = 0;
Expand All @@ -75,14 +68,14 @@ impl BitUnpacker {
/// The bitunpacker works by doing an unaligned read of 8 bytes.
/// For this reason, values of `num_bits` between
/// [57..63] are forbidden.
pub fn new(num_bits: u8) -> BitUnpacker {
pub fn new(num_bits: u8) -> Self {
assert!(num_bits <= 7 * 8 || num_bits == 64);
let mask: u64 = if num_bits == 64 {
!0u64
} else {
(1u64 << num_bits) - 1u64
};
BitUnpacker {
Self {
num_bits: usize::from(num_bits),
mask,
}
Expand Down Expand Up @@ -138,7 +131,7 @@ impl BitUnpacker {

// We use `usize` here to avoid overflow issues.
let end_bit_read = (end_idx as usize) * self.num_bits;
let end_byte_read = (end_bit_read + 7) / 8;
let end_byte_read = end_bit_read.div_ceil(8);
assert!(
end_byte_read <= data.len(),
"Requested index is out of bounds."
Expand Down Expand Up @@ -248,7 +241,7 @@ mod test {
use super::{BitPacker, BitUnpacker};

fn create_bitpacker(len: usize, num_bits: u8) -> (BitUnpacker, Vec<u64>, Vec<u8>) {
let mut data = Vec::new();
let mut data = vec![];
let mut bitpacker = BitPacker::new();
let max_val: u64 = (1u64 << num_bits as u64) - 1u64;
let vals: Vec<u64> = (0u64..len as u64)
Expand All @@ -258,7 +251,7 @@ mod test {
bitpacker.write(val, num_bits, &mut data).unwrap();
}
bitpacker.close(&mut data).unwrap();
assert_eq!(data.len(), ((num_bits as usize) * len + 7) / 8);
assert_eq!(data.len(), ((num_bits as usize) * len).div_ceil(8));
let bitunpacker = BitUnpacker::new(num_bits);
(bitunpacker, vals, data)
}
Expand Down Expand Up @@ -298,13 +291,13 @@ mod test {
}

fn test_bitpacker_aux(num_bits: u8, vals: &[u64]) {
let mut buffer: Vec<u8> = Vec::new();
let mut buffer: Vec<u8> = vec![];
let mut bitpacker = BitPacker::new();
for &val in vals {
bitpacker.write(val, num_bits, &mut buffer).unwrap();
}
bitpacker.flush(&mut buffer).unwrap();
assert_eq!(buffer.len(), (vals.len() * num_bits as usize + 7) / 8);
assert_eq!(buffer.len(), (vals.len() * num_bits as usize).div_ceil(8));
let bitunpacker = BitUnpacker::new(num_bits);
let max_val = if num_bits == 64 {
u64::MAX
Expand Down Expand Up @@ -357,14 +350,14 @@ mod test {
} else {
(1u32 << num_bits) - 1
};
let mut buffer: Vec<u8> = Vec::new();
let mut buffer: Vec<u8> = vec![];
let mut bitpacker = BitPacker::new();
for val in 0..100 {
bitpacker.write(val & mask as u64, num_bits, &mut buffer).unwrap();
}
bitpacker.flush(&mut buffer).unwrap();
let bitunpacker = BitUnpacker::new(num_bits);
let mut output: Vec<u32> = Vec::new();
let mut output: Vec<u32> = vec![];
for len in [0, 1, 2, 32, 33, 34, 64] {
for start_idx in 0u32..32u32 {
output.resize(len, 0);
Expand Down
10 changes: 5 additions & 5 deletions bitpacker/src/blocked_bitpacker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub struct BlockedBitpacker {
}
impl Default for BlockedBitpacker {
fn default() -> Self {
BlockedBitpacker::new()
Self::new()
}
}

Expand Down Expand Up @@ -73,7 +73,7 @@ impl BlockedBitpacker {

/// The memory used (inclusive childs)
pub fn mem_usage(&self) -> usize {
std::mem::size_of::<BlockedBitpacker>()
std::mem::size_of::<Self>()
+ self.compressed_blocks.capacity()
+ mem_usage(&self.offset_and_bits)
+ mem_usage(&self.buffer)
Expand Down Expand Up @@ -140,10 +140,10 @@ impl BlockedBitpacker {
pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
// todo performance: we could decompress a whole block and cache it instead
let bitpacked_elems = self.offset_and_bits.len() * BLOCK_SIZE;
let iter = (0..bitpacked_elems)

(0..bitpacked_elems)
.map(move |idx| self.get(idx))
.chain(self.buffer.iter().cloned());
iter
.chain(self.buffer.iter().cloned())
}
}

Expand Down
16 changes: 7 additions & 9 deletions bitpacker/src/filter_vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ impl FilterImplPerInstructionSet {
pub fn is_available(&self) -> bool {
match *self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => is_x86_feature_detected!("avx2"),
FilterImplPerInstructionSet::Scalar => true,
Self::AVX2 => is_x86_feature_detected!("avx2"),
Self::Scalar => true,
}
}
}
Expand All @@ -37,22 +37,20 @@ const IMPLS: [FilterImplPerInstructionSet; 1] = [FilterImplPerInstructionSet::Sc
impl FilterImplPerInstructionSet {
#[inline]
#[allow(unused_variables)] // on non-x86_64, code is unused.
fn from(code: u8) -> FilterImplPerInstructionSet {
fn from(code: u8) -> Self {
#[cfg(target_arch = "x86_64")]
if code == FilterImplPerInstructionSet::AVX2 as u8 {
return FilterImplPerInstructionSet::AVX2;
return Self::AVX2;
}
FilterImplPerInstructionSet::Scalar
Self::Scalar
}

#[inline]
fn filter_vec_in_place(self, range: RangeInclusive<u32>, offset: u32, output: &mut Vec<u32>) {
match self {
#[cfg(target_arch = "x86_64")]
FilterImplPerInstructionSet::AVX2 => avx2::filter_vec_in_place(range, offset, output),
FilterImplPerInstructionSet::Scalar => {
scalar::filter_vec_in_place(range, offset, output)
}
Self::AVX2 => avx2::filter_vec_in_place(range, offset, output),
Self::Scalar => scalar::filter_vec_in_place(range, offset, output),
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion columnar/benches/bench_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub fn generate_columnar_and_open(card: Card, num_docs: u32) -> Column {
}

fn main() {
let mut inputs = Vec::new();
let mut inputs = vec![];

let mut add_card = |card1: Card| {
inputs.push((
Expand Down
2 changes: 1 addition & 1 deletion columnar/benches/bench_first_vals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn get_test_columns() -> Columns {
dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
dataframe_writer.record_numerical(idx as u32, "multi_values", NumericalValue::U64(*val));
}
let mut buffer: Vec<u8> = Vec::new();
let mut buffer: Vec<u8> = vec![];
dataframe_writer
.serialize(data.len() as u32, &mut buffer)
.unwrap();
Expand Down
4 changes: 2 additions & 2 deletions columnar/benches/bench_merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use tantivy_columnar::*;
const NUM_DOCS: u32 = 100_000;

fn main() {
let mut inputs = Vec::new();
let mut inputs = vec![];

let mut add_combo = |card1: Card, card2: Card| {
inputs.push((
Expand Down Expand Up @@ -36,7 +36,7 @@ fn main() {
input_name,
columnar_readers,
move |columnar_readers: &Vec<ColumnarReader>| {
let mut out = Vec::new();
let mut out = vec![];
let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);

Expand Down
10 changes: 5 additions & 5 deletions columnar/benches/bench_values_u128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ fn get_data_50percent_item() -> Vec<u128> {
}
data.push(SINGLE_ITEM);
data.shuffle(&mut rng);
let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
data

data.iter().map(|el| *el as u128).collect::<Vec<_>>()
}

#[bench]
Expand All @@ -57,7 +57,7 @@ fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
let column = get_u128_column_from_data(&data);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(
*FIFTY_PERCENT_RANGE.start() as u128..=*FIFTY_PERCENT_RANGE.end() as u128,
0..data.len() as u32,
Expand All @@ -73,7 +73,7 @@ fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
let column = get_u128_column_from_data(&data);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(
*SINGLE_ITEM_RANGE.start() as u128..=*SINGLE_ITEM_RANGE.end() as u128,
0..data.len() as u32,
Expand All @@ -89,7 +89,7 @@ fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
let column = get_u128_column_from_data(&data);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
positions
});
Expand Down
16 changes: 8 additions & 8 deletions columnar/benches/bench_values_u64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ fn get_data_50percent_item() -> Vec<u128> {
data.push(SINGLE_ITEM);

data.shuffle(&mut rng);
let data = data.iter().map(|el| *el as u128).collect::<Vec<_>>();
data

data.iter().map(|el| *el as u128).collect::<Vec<_>>()
}

// U64 RANGE START
Expand All @@ -80,7 +80,7 @@ fn bench_intfastfield_getrange_u64_50percent_hit(b: &mut Bencher) {
let data = data.iter().map(|el| *el as u64).collect::<Vec<_>>();
let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);
b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(
FIFTY_PERCENT_RANGE,
0..data.len() as u32,
Expand All @@ -97,7 +97,7 @@ fn bench_intfastfield_getrange_u64_1percent_hit(b: &mut Bencher) {
let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(
ONE_PERCENT_ITEM_RANGE,
0..data.len() as u32,
Expand All @@ -114,7 +114,7 @@ fn bench_intfastfield_getrange_u64_single_hit(b: &mut Bencher) {
let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(SINGLE_ITEM_RANGE, 0..data.len() as u32, &mut positions);
positions
});
Expand All @@ -127,7 +127,7 @@ fn bench_intfastfield_getrange_u64_hit_all(b: &mut Bencher) {
let column: Arc<dyn ColumnValues<u64>> = serialize_and_load(&data, CodecType::Bitpacked);

b.iter(|| {
let mut positions = Vec::new();
let mut positions = vec![];
column.get_row_ids_for_value_range(0..=u64::MAX, 0..data.len() as u32, &mut positions);
positions
});
Expand All @@ -141,7 +141,7 @@ fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
b.iter(|| {
let mut a = 0u64;
for i in (0..n / 7).map(|val| val * 7) {
a += permutation[i as usize];
a += permutation[i];
}
a
});
Expand Down Expand Up @@ -196,7 +196,7 @@ fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
b.iter(|| {
let mut a = 0u64;
for i in 0..permutation.len() {
a += permutation[i as usize] as u64;
a += permutation[i] as u64;
}
a
});
Expand Down
12 changes: 6 additions & 6 deletions columnar/benches/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ pub enum Card {
impl Display for Card {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Card::MultiSparse => write!(f, "multi sparse 1/13"),
Card::Multi => write!(f, "multi 2x"),
Card::Sparse => write!(f, "sparse 1/13"),
Card::Dense => write!(f, "dense 1/12"),
Card::Full => write!(f, "full"),
Self::MultiSparse => write!(f, "multi sparse 1/13"),
Self::Multi => write!(f, "multi 2x"),
Self::Sparse => write!(f, "sparse 1/13"),
Self::Dense => write!(f, "dense 1/12"),
Self::Full => write!(f, "full"),
}
}
}
Expand Down Expand Up @@ -53,7 +53,7 @@ pub fn generate_columnar_with_name(card: Card, num_docs: u32, column_name: &str)
}
}

let mut wrt: Vec<u8> = Vec::new();
let mut wrt: Vec<u8> = vec![];
columnar_writer.serialize(num_docs, &mut wrt).unwrap();
ColumnarReader::open(wrt).unwrap()
}
2 changes: 1 addition & 1 deletion columnar/columnar-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ fn main() -> io::Result<()> {

println!("value count {total_count}");

let mut buffer = Vec::new();
let mut buffer = vec![];
let start_serialize = Instant::now();
columnar.serialize(doc, None, &mut buffer)?;
println!("Serialized in {:?}", start_serialize.elapsed());
Expand Down
10 changes: 5 additions & 5 deletions columnar/src/block_accessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ mod tests {
let docs: Vec<u32> = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
let hits: Vec<u32> = vec![2, 4, 6, 8, 10];

let mut missing_docs: Vec<u32> = Vec::new();
let mut missing_docs: Vec<u32> = vec![];

find_missing_docs(&docs, &hits, |missing_doc| {
missing_docs.push(missing_doc);
Expand All @@ -130,10 +130,10 @@ mod tests {

#[test]
fn test_find_missing_docs_empty() {
let docs: Vec<u32> = Vec::new();
let docs: Vec<u32> = vec![];
let hits: Vec<u32> = vec![2, 4, 6, 8, 10];

let mut missing_docs: Vec<u32> = Vec::new();
let mut missing_docs: Vec<u32> = vec![];

find_missing_docs(&docs, &hits, |missing_doc| {
missing_docs.push(missing_doc);
Expand All @@ -145,9 +145,9 @@ mod tests {
#[test]
fn test_find_missing_docs_all_missing() {
let docs: Vec<u32> = vec![1, 2, 3, 4, 5];
let hits: Vec<u32> = Vec::new();
let hits: Vec<u32> = vec![];

let mut missing_docs: Vec<u32> = Vec::new();
let mut missing_docs: Vec<u32> = vec![];

find_missing_docs(&docs, &hits, |missing_doc| {
missing_docs.push(missing_doc);
Expand Down
Loading