From 91fcff1fa1a5c5ee765dd6711daf14ddbe56fec5 Mon Sep 17 00:00:00 2001 From: Qiwei Huang Date: Mon, 20 Oct 2025 22:44:40 +0800 Subject: [PATCH 1/3] Using MAX_DISCRIMINANT as the enum count --- parquet/src/basic.rs | 4 +++- parquet/src/column/reader/decoder.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 7f50eada46de..ca82880b7547 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -678,6 +678,8 @@ enum Encoding { /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may /// perform poorly for large values of N. BYTE_STREAM_SPLIT = 9; + SOME_ENCODING = 10; + SOME_OTHER11=11; } ); @@ -741,7 +743,7 @@ pub struct EncodingMask(i32); impl EncodingMask { /// Highest valued discriminant in the [`Encoding`] enum - const MAX_ENCODING: i32 = Encoding::BYTE_STREAM_SPLIT as i32; + const MAX_ENCODING: i32 = Encoding::MAX_DISCRIMINANT; /// A mask consisting of unused bit positions, used for validation. This includes the never /// used GROUP_VAR_INT encoding value of `1`. const ALLOWED_MASK: u32 = diff --git a/parquet/src/column/reader/decoder.rs b/parquet/src/column/reader/decoder.rs index 1d4e2f751181..e49906207577 100644 --- a/parquet/src/column/reader/decoder.rs +++ b/parquet/src/column/reader/decoder.rs @@ -138,7 +138,7 @@ pub trait ColumnValueDecoder { /// /// This replaces `HashMap` lookups with direct indexing to avoid hashing overhead in the /// hot decoding paths. -const ENCODING_SLOTS: usize = Encoding::BYTE_STREAM_SPLIT as usize + 1; +const ENCODING_SLOTS: usize = Encoding::MAX_DISCRIMINANT as usize + 1; /// An implementation of [`ColumnValueDecoder`] for `[T::T]` pub struct ColumnValueDecoderImpl { From 7986d7147535f5aab4e8524eb5ec2e9afef30654 Mon Sep 17 00:00:00 2001 From: Qiwei Huang Date: Mon, 20 Oct 2025 22:47:34 +0800 Subject: [PATCH 2/3] Update the Macro --- parquet/src/parquet_macros.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index eb8bc2b7f07a..d7f865de040e 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -79,6 +79,35 @@ macro_rules! thrift_enum { Ok(field_id) } } + + impl $identifier { + #[allow(deprecated)] + #[doc = "Returns a slice containing every variant of this enum."] + #[allow(dead_code)] + pub const VARIANTS: &'static [Self] = &[ + $(Self::$field_name),* + ]; + + #[allow(deprecated)] + const fn max_discriminant_impl() -> i32 { + let values: &[i32] = &[$($field_value),*]; + let mut max = values[0]; + let mut idx = 1; + while idx < values.len() { + let candidate = values[idx]; + if candidate > max { + max = candidate; + } + idx += 1; + } + max + } + + #[allow(deprecated)] + #[doc = "Returns the largest discriminant value defined for this enum."] + #[allow(dead_code)] + pub const MAX_DISCRIMINANT: i32 = Self::max_discriminant_impl(); + } } } From 8b39d268d1e4a6eb1dde3fe8d25a8181bc26f345 Mon Sep 17 00:00:00 2001 From: Qiwei Huang Date: Mon, 20 Oct 2025 22:53:38 +0800 Subject: [PATCH 3/3] Remove the newly added encodings --- parquet/src/basic.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index ca82880b7547..def69f251581 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -678,8 +678,6 @@ enum Encoding { /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may /// perform poorly for large values of N. BYTE_STREAM_SPLIT = 9; - SOME_ENCODING = 10; - SOME_OTHER11=11; } );