|
19 | 19 |
|
20 | 20 | #pragma once |
21 | 21 |
|
22 | | -#include <algorithm> |
23 | 22 | #include <cstdint> |
24 | 23 | #include <cstring> |
25 | 24 | #include <type_traits> |
@@ -249,110 +248,36 @@ inline bool BitWriter::PutAligned(T val, int num_bytes) { |
249 | 248 | return true; |
250 | 249 | } |
251 | 250 |
|
252 | | -namespace detail { |
253 | | - |
254 | | -template <typename T> |
255 | | -inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, |
256 | | - int* bit_offset, int* byte_offset, uint64_t* buffered_values) { |
257 | | -#ifdef _MSC_VER |
258 | | -# pragma warning(push) |
259 | | -# pragma warning(disable : 4800) |
260 | | -#endif |
261 | | - *v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >> |
262 | | - *bit_offset); |
263 | | -#ifdef _MSC_VER |
264 | | -# pragma warning(pop) |
265 | | -#endif |
266 | | - *bit_offset += num_bits; |
267 | | - if (*bit_offset >= 64) { |
268 | | - *byte_offset += 8; |
269 | | - *bit_offset -= 64; |
270 | | - |
271 | | - *buffered_values = |
272 | | - detail::ReadLittleEndianWord(buffer + *byte_offset, max_bytes - *byte_offset); |
273 | | -#ifdef _MSC_VER |
274 | | -# pragma warning(push) |
275 | | -# pragma warning(disable : 4800 4805) |
276 | | -#endif |
277 | | - // Read bits of v that crossed into new buffered_values_ |
278 | | - if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) { |
279 | | - // if shift exponent(num_bits - *bit_offset) is not less than sizeof(T), *v will not |
280 | | - // change and the following code may cause a runtime error that the shift exponent |
281 | | - // is too large |
282 | | - *v = *v | static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset) |
283 | | - << (num_bits - *bit_offset)); |
284 | | - } |
285 | | -#ifdef _MSC_VER |
286 | | -# pragma warning(pop) |
287 | | -#endif |
288 | | - ARROW_DCHECK_LE(*bit_offset, 64); |
289 | | - } |
290 | | -} |
291 | | - |
292 | | -} // namespace detail |
293 | | - |
294 | 251 | template <typename T> |
295 | 252 | inline bool BitReader::GetValue(int num_bits, T* v) { |
296 | 253 | return GetBatch(num_bits, v, 1) == 1; |
297 | 254 | } |
298 | 255 |
|
299 | | -namespace internal_bit_reader { |
300 | | -template <typename T> |
301 | | -struct unpack_detect { |
302 | | - using type = std::make_unsigned_t<T>; |
303 | | -}; |
304 | | - |
305 | | -template <> |
306 | | -struct unpack_detect<bool> { |
307 | | - using type = bool; |
308 | | -}; |
309 | | -} // namespace internal_bit_reader |
310 | | - |
311 | 256 | template <typename T> |
312 | 257 | inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) { |
313 | | - ARROW_DCHECK(buffer_ != NULL); |
314 | | - ARROW_DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)) << "num_bits: " << num_bits; |
| 258 | + constexpr uint64_t kBitsPerByte = 8; |
315 | 259 |
|
316 | | - int bit_offset = bit_offset_; |
317 | | - int byte_offset = byte_offset_; |
318 | | - uint64_t buffered_values = buffered_values_; |
319 | | - int max_bytes = max_bytes_; |
320 | | - const uint8_t* buffer = buffer_; |
| 260 | + ARROW_DCHECK(buffer_ != NULLPTR); |
| 261 | + ARROW_DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8)) << "num_bits: " << num_bits; |
321 | 262 |
|
322 | 263 | const int64_t needed_bits = num_bits * static_cast<int64_t>(batch_size); |
323 | | - constexpr uint64_t kBitsPerByte = 8; |
324 | 264 | const int64_t remaining_bits = |
325 | | - static_cast<int64_t>(max_bytes - byte_offset) * kBitsPerByte - bit_offset; |
| 265 | + static_cast<int64_t>(max_bytes_ - byte_offset_) * kBitsPerByte - bit_offset_; |
326 | 266 | if (remaining_bits < needed_bits) { |
327 | 267 | batch_size = static_cast<int>(remaining_bits / num_bits); |
328 | 268 | } |
329 | 269 |
|
330 | | - int i = 0; |
331 | | - if (ARROW_PREDICT_FALSE(bit_offset != 0)) { |
332 | | - for (; i < batch_size && bit_offset != 0; ++i) { |
333 | | - detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, |
334 | | - &buffered_values); |
335 | | - } |
336 | | - } |
337 | | - |
338 | | - using unpack_t = typename internal_bit_reader::unpack_detect<T>::type; |
339 | | - |
340 | | - int num_unpacked = ::arrow::internal::unpack( |
341 | | - buffer + byte_offset, reinterpret_cast<unpack_t*>(v + i), batch_size - i, num_bits); |
342 | | - i += num_unpacked; |
343 | | - byte_offset += num_unpacked * num_bits / 8; |
344 | | - |
345 | | - buffered_values = |
346 | | - detail::ReadLittleEndianWord(buffer + byte_offset, max_bytes - byte_offset); |
| 270 | + if constexpr (std::is_same_v<T, bool>) { |
| 271 | + ::arrow::internal::unpack(buffer_ + byte_offset_, v, batch_size, num_bits, |
| 272 | + bit_offset_); |
347 | 273 |
|
348 | | - for (; i < batch_size; ++i) { |
349 | | - detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset, |
350 | | - &buffered_values); |
| 274 | + } else { |
| 275 | + ::arrow::internal::unpack(buffer_ + byte_offset_, |
| 276 | + reinterpret_cast<std::make_unsigned_t<T>*>(v), batch_size, |
| 277 | + num_bits, bit_offset_); |
351 | 278 | } |
352 | 279 |
|
353 | | - bit_offset_ = bit_offset; |
354 | | - byte_offset_ = byte_offset; |
355 | | - buffered_values_ = buffered_values; |
| 280 | + Advance(batch_size * num_bits); |
356 | 281 |
|
357 | 282 | return batch_size; |
358 | 283 | } |
|
0 commit comments