Skip to content

Commit e00f508

Browse files
committed
Revert "Upgrade arrow/parquet to 56.0.0 (apache#16690)"
This reverts commit fa1f8c1.
1 parent c2f52e6 commit e00f508

File tree

29 files changed

+419
-1217
lines changed

29 files changed

+419
-1217
lines changed

Cargo.lock

Lines changed: 104 additions & 62 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,20 @@ ahash = { version = "0.8", default-features = false, features = [
9090
"runtime-rng",
9191
] }
9292
apache-avro = { version = "0.20", default-features = false }
93-
arrow = { version = "56.0.0", features = [
93+
arrow = { version = "55.2.0", features = [
9494
"prettyprint",
9595
"chrono-tz",
9696
] }
97-
arrow-buffer = { version = "56.0.0", default-features = false }
98-
arrow-flight = { version = "56.0.0", features = [
97+
arrow-buffer = { version = "55.2.0", default-features = false }
98+
arrow-flight = { version = "55.2.0", features = [
9999
"flight-sql-experimental",
100100
] }
101-
arrow-ipc = { version = "56.0.0", default-features = false, features = [
101+
arrow-ipc = { version = "55.2.0", default-features = false, features = [
102102
"lz4",
103103
] }
104-
arrow-ord = { version = "56.0.0", default-features = false }
105-
arrow-schema = { version = "56.0.0", default-features = false }
106-
async-trait = "0.1.89"
104+
arrow-ord = { version = "55.2.0", default-features = false }
105+
arrow-schema = { version = "55.2.0", default-features = false }
106+
async-trait = "0.1.88"
107107
bigdecimal = "0.4.8"
108108
bytes = "1.10"
109109
chrono = { version = "0.4.41", default-features = false }
@@ -157,7 +157,7 @@ itertools = "0.14"
157157
log = "^0.4"
158158
object_store = { version = "0.12.3", default-features = false }
159159
parking_lot = "0.12"
160-
parquet = { version = "56.0.0", default-features = false, features = [
160+
parquet = { version = "55.2.0", default-features = false, features = [
161161
"arrow",
162162
"async",
163163
"object_store",

datafusion-examples/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ serde_json = { workspace = true }
8181
tempfile = { workspace = true }
8282
test-utils = { path = "../test-utils" }
8383
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
84-
tonic = "0.13.1"
84+
tonic = "0.12.1"
8585
tracing = { version = "0.1" }
8686
tracing-subscriber = { version = "0.3" }
8787
url = { workspace = true }

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ log = { workspace = true }
7171
object_store = { workspace = true, optional = true }
7272
parquet = { workspace = true, optional = true, default-features = true }
7373
paste = "1.0.15"
74-
pyo3 = { version = "0.25", optional = true }
74+
pyo3 = { version = "0.24.2", optional = true }
7575
recursive = { workspace = true, optional = true }
7676
sqlparser = { workspace = true }
7777
tokio = { workspace = true }

datafusion/common/src/config.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,13 @@ config_namespace! {
602602
/// default parquet writer setting
603603
pub statistics_enabled: Option<String>, transform = str::to_lowercase, default = Some("page".into())
604604

605+
/// (writing) Sets max statistics size for any column. If NULL, uses
606+
/// default parquet writer setting
607+
/// max_statistics_size is deprecated, currently it is not being used
608+
// TODO: remove once deprecated
609+
#[deprecated(since = "45.0.0", note = "Setting does not do anything")]
610+
pub max_statistics_size: Option<usize>, default = Some(4096)
611+
605612
/// (writing) Target maximum number of rows in each row group (defaults to 1M
606613
/// rows). Writing larger row groups requires more memory to write, but
607614
/// can get better compression and be faster to read.
@@ -613,9 +620,9 @@ config_namespace! {
613620
/// (writing) Sets column index truncate length
614621
pub column_index_truncate_length: Option<usize>, default = Some(64)
615622

616-
/// (writing) Sets statistics truncate length. If NULL, uses
623+
/// (writing) Sets statictics truncate length. If NULL, uses
617624
/// default parquet writer setting
618-
pub statistics_truncate_length: Option<usize>, default = Some(64)
625+
pub statistics_truncate_length: Option<usize>, default = None
619626

620627
/// (writing) Sets best effort maximum number of rows in data page
621628
pub data_page_row_count_limit: usize, default = 20_000
@@ -2134,6 +2141,13 @@ config_namespace_with_hashmap! {
21342141
/// Sets bloom filter number of distinct values. If NULL, uses
21352142
/// default parquet options
21362143
pub bloom_filter_ndv: Option<u64>, default = None
2144+
2145+
/// Sets max statistics size for the column path. If NULL, uses
2146+
/// default parquet options
2147+
/// max_statistics_size is deprecated, currently it is not being used
2148+
// TODO: remove once deprecated
2149+
#[deprecated(since = "45.0.0", note = "Setting does not do anything")]
2150+
pub max_statistics_size: Option<usize>, default = None
21372151
}
21382152
}
21392153

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use parquet::{
3535
metadata::KeyValue,
3636
properties::{
3737
EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
38-
DEFAULT_STATISTICS_ENABLED,
38+
DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
3939
},
4040
},
4141
schema::types::ColumnPath,
@@ -160,6 +160,16 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
160160
builder =
161161
builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
162162
}
163+
164+
// max_statistics_size is deprecated, currently it is not being used
165+
// TODO: remove once deprecated
166+
#[allow(deprecated)]
167+
if let Some(max_statistics_size) = options.max_statistics_size {
168+
builder = {
169+
#[allow(deprecated)]
170+
builder.set_column_max_statistics_size(path, max_statistics_size)
171+
}
172+
}
163173
}
164174

165175
Ok(builder)
@@ -208,6 +218,7 @@ impl ParquetOptions {
208218
dictionary_enabled,
209219
dictionary_page_size_limit,
210220
statistics_enabled,
221+
max_statistics_size,
211222
max_row_group_size,
212223
created_by,
213224
column_index_truncate_length,
@@ -253,6 +264,13 @@ impl ParquetOptions {
253264
.set_data_page_row_count_limit(*data_page_row_count_limit)
254265
.set_bloom_filter_enabled(*bloom_filter_on_write);
255266

267+
builder = {
268+
#[allow(deprecated)]
269+
builder.set_max_statistics_size(
270+
max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
271+
)
272+
};
273+
256274
if let Some(bloom_filter_fpp) = bloom_filter_fpp {
257275
builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
258276
};
@@ -445,10 +463,12 @@ mod tests {
445463
fn column_options_with_non_defaults(
446464
src_col_defaults: &ParquetOptions,
447465
) -> ParquetColumnOptions {
466+
#[allow(deprecated)] // max_statistics_size
448467
ParquetColumnOptions {
449468
compression: Some("zstd(22)".into()),
450469
dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
451470
statistics_enabled: Some("none".into()),
471+
max_statistics_size: Some(72),
452472
encoding: Some("RLE".into()),
453473
bloom_filter_enabled: Some(true),
454474
bloom_filter_fpp: Some(0.72),
@@ -473,6 +493,7 @@ mod tests {
473493
dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)),
474494
dictionary_page_size_limit: 42,
475495
statistics_enabled: Some("chunk".into()),
496+
max_statistics_size: Some(42),
476497
max_row_group_size: 42,
477498
created_by: "wordy".into(),
478499
column_index_truncate_length: Some(42),
@@ -530,6 +551,7 @@ mod tests {
530551
),
531552
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
532553
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
554+
max_statistics_size: Some(props.max_statistics_size(&col)),
533555
}
534556
}
535557

@@ -586,6 +608,7 @@ mod tests {
586608
compression: default_col_props.compression,
587609
dictionary_enabled: default_col_props.dictionary_enabled,
588610
statistics_enabled: default_col_props.statistics_enabled,
611+
max_statistics_size: default_col_props.max_statistics_size,
589612
bloom_filter_on_write: default_col_props
590613
.bloom_filter_enabled
591614
.unwrap_or_default(),

datafusion/common/src/scalar/mod.rs

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -904,10 +904,11 @@ pub fn dict_from_values<K: ArrowDictionaryKeyType>(
904904
.map(|index| {
905905
if values_array.is_valid(index) {
906906
let native_index = K::Native::from_usize(index).ok_or_else(|| {
907-
_internal_datafusion_err!(
908-
"Can not create index of type {} from value {index}",
909-
K::DATA_TYPE
910-
)
907+
DataFusionError::Internal(format!(
908+
"Can not create index of type {} from value {}",
909+
K::DATA_TYPE,
910+
index
911+
))
911912
})?;
912913
Ok(Some(native_index))
913914
} else {
@@ -2202,16 +2203,6 @@ impl ScalarValue {
22022203
}
22032204

22042205
let array: ArrayRef = match &data_type {
2205-
DataType::Decimal32(_precision, _scale) => {
2206-
return _not_impl_err!(
2207-
"Decimal32 not supported in ScalarValue::iter_to_array"
2208-
);
2209-
}
2210-
DataType::Decimal64(_precision, _scale) => {
2211-
return _not_impl_err!(
2212-
"Decimal64 not supported in ScalarValue::iter_to_array"
2213-
);
2214-
}
22152206
DataType::Decimal128(precision, scale) => {
22162207
let decimal_array =
22172208
ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;

datafusion/common/src/types/native.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,10 +407,7 @@ impl From<DataType> for NativeType {
407407
DataType::Union(union_fields, _) => {
408408
Union(LogicalUnionFields::from(&union_fields))
409409
}
410-
DataType::Decimal32(p, s)
411-
| DataType::Decimal64(p, s)
412-
| DataType::Decimal128(p, s)
413-
| DataType::Decimal256(p, s) => Decimal(p, s),
410+
DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(p, s),
414411
DataType::Map(field, _) => Map(Arc::new(field.as_ref().into())),
415412
DataType::Dictionary(_, data_type) => data_type.as_ref().clone().into(),
416413
DataType::RunEndEncoded(_, field) => field.data_type().clone().into(),

datafusion/core/tests/fuzz_cases/pruning.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,14 @@ async fn write_parquet_file(
319319
row_groups: Vec<Vec<String>>,
320320
) -> Bytes {
321321
let mut buf = BytesMut::new().writer();
322-
let props = WriterProperties::builder()
323-
.set_statistics_enabled(EnabledStatistics::Chunk) // row group level
324-
.set_statistics_truncate_length(truncation_length);
322+
let mut props = WriterProperties::builder();
323+
if let Some(truncation_length) = truncation_length {
324+
props = {
325+
#[allow(deprecated)]
326+
props.set_max_statistics_size(truncation_length)
327+
}
328+
}
329+
props = props.set_statistics_enabled(EnabledStatistics::Chunk); // row group level
325330
let props = props.build();
326331
{
327332
let mut writer =

datafusion/core/tests/parquet/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,11 @@ struct ContextWithParquet {
110110

111111
/// The output of running one of the test cases
112112
struct TestOutput {
113-
/// The input query SQL
113+
/// The input string
114114
sql: String,
115115
/// Execution metrics for the Parquet Scan
116116
parquet_metrics: MetricsSet,
117-
/// number of actual rows in results
117+
/// number of rows in results
118118
result_rows: usize,
119119
/// the contents of the input, as a string
120120
pretty_input: String,

0 commit comments

Comments
 (0)