Skip to content

Commit fa1f8c1

Browse files
authored
Upgrade arrow/parquet to 56.0.0 (#16690)
1 parent 2968331 commit fa1f8c1

File tree

29 files changed

+903
-394
lines changed

29 files changed

+903
-394
lines changed

Cargo.lock

Lines changed: 62 additions & 104 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,19 @@ ahash = { version = "0.8", default-features = false, features = [
8989
"runtime-rng",
9090
] }
9191
apache-avro = { version = "0.17", default-features = false }
92-
arrow = { version = "55.2.0", features = [
92+
arrow = { version = "56.0.0", features = [
9393
"prettyprint",
9494
"chrono-tz",
9595
] }
96-
arrow-buffer = { version = "55.2.0", default-features = false }
97-
arrow-flight = { version = "55.2.0", features = [
96+
arrow-buffer = { version = "56.0.0", default-features = false }
97+
arrow-flight = { version = "56.0.0", features = [
9898
"flight-sql-experimental",
9999
] }
100-
arrow-ipc = { version = "55.2.0", default-features = false, features = [
100+
arrow-ipc = { version = "56.0.0", default-features = false, features = [
101101
"lz4",
102102
] }
103-
arrow-ord = { version = "55.2.0", default-features = false }
104-
arrow-schema = { version = "55.2.0", default-features = false }
103+
arrow-ord = { version = "56.0.0", default-features = false }
104+
arrow-schema = { version = "56.0.0", default-features = false }
105105
async-trait = "0.1.88"
106106
bigdecimal = "0.4.8"
107107
bytes = "1.10"
@@ -155,7 +155,7 @@ itertools = "0.14"
155155
log = "^0.4"
156156
object_store = { version = "0.12.3", default-features = false }
157157
parking_lot = "0.12"
158-
parquet = { version = "55.2.0", default-features = false, features = [
158+
parquet = { version = "56.0.0", default-features = false, features = [
159159
"arrow",
160160
"async",
161161
"object_store",

datafusion-examples/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ serde_json = { workspace = true }
7878
tempfile = { workspace = true }
7979
test-utils = { path = "../test-utils" }
8080
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
81-
tonic = "0.12.1"
81+
tonic = "0.13.1"
8282
tracing = { version = "0.1" }
8383
tracing-subscriber = { version = "0.3" }
8484
url = { workspace = true }

datafusion/common/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ log = { workspace = true }
7171
object_store = { workspace = true, optional = true }
7272
parquet = { workspace = true, optional = true, default-features = true }
7373
paste = "1.0.15"
74-
pyo3 = { version = "0.24.2", optional = true }
74+
pyo3 = { version = "0.25", optional = true }
7575
recursive = { workspace = true, optional = true }
7676
sqlparser = { workspace = true }
7777
tokio = { workspace = true }

datafusion/common/src/config.rs

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -607,13 +607,6 @@ config_namespace! {
607607
/// default parquet writer setting
608608
pub statistics_enabled: Option<String>, transform = str::to_lowercase, default = Some("page".into())
609609

610-
/// (writing) Sets max statistics size for any column. If NULL, uses
611-
/// default parquet writer setting
612-
/// max_statistics_size is deprecated, currently it is not being used
613-
// TODO: remove once deprecated
614-
#[deprecated(since = "45.0.0", note = "Setting does not do anything")]
615-
pub max_statistics_size: Option<usize>, default = Some(4096)
616-
617610
/// (writing) Target maximum number of rows in each row group (defaults to 1M
618611
/// rows). Writing larger row groups requires more memory to write, but
619612
/// can get better compression and be faster to read.
@@ -625,9 +618,9 @@ config_namespace! {
625618
/// (writing) Sets column index truncate length
626619
pub column_index_truncate_length: Option<usize>, default = Some(64)
627620

628-
/// (writing) Sets statictics truncate length. If NULL, uses
621+
/// (writing) Sets statistics truncate length. If NULL, uses
629622
/// default parquet writer setting
630-
pub statistics_truncate_length: Option<usize>, default = None
623+
pub statistics_truncate_length: Option<usize>, default = Some(64)
631624

632625
/// (writing) Sets best effort maximum number of rows in data page
633626
pub data_page_row_count_limit: usize, default = 20_000
@@ -2064,13 +2057,6 @@ config_namespace_with_hashmap! {
20642057
/// Sets bloom filter number of distinct values. If NULL, uses
20652058
/// default parquet options
20662059
pub bloom_filter_ndv: Option<u64>, default = None
2067-
2068-
/// Sets max statistics size for the column path. If NULL, uses
2069-
/// default parquet options
2070-
/// max_statistics_size is deprecated, currently it is not being used
2071-
// TODO: remove once deprecated
2072-
#[deprecated(since = "45.0.0", note = "Setting does not do anything")]
2073-
pub max_statistics_size: Option<usize>, default = None
20742060
}
20752061
}
20762062

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use parquet::{
3636
metadata::KeyValue,
3737
properties::{
3838
EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion,
39-
DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED,
39+
DEFAULT_STATISTICS_ENABLED,
4040
},
4141
},
4242
schema::types::ColumnPath,
@@ -161,16 +161,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder {
161161
builder =
162162
builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv);
163163
}
164-
165-
// max_statistics_size is deprecated, currently it is not being used
166-
// TODO: remove once deprecated
167-
#[allow(deprecated)]
168-
if let Some(max_statistics_size) = options.max_statistics_size {
169-
builder = {
170-
#[allow(deprecated)]
171-
builder.set_column_max_statistics_size(path, max_statistics_size)
172-
}
173-
}
174164
}
175165

176166
Ok(builder)
@@ -219,7 +209,6 @@ impl ParquetOptions {
219209
dictionary_enabled,
220210
dictionary_page_size_limit,
221211
statistics_enabled,
222-
max_statistics_size,
223212
max_row_group_size,
224213
created_by,
225214
column_index_truncate_length,
@@ -266,13 +255,6 @@ impl ParquetOptions {
266255
.set_data_page_row_count_limit(*data_page_row_count_limit)
267256
.set_bloom_filter_enabled(*bloom_filter_on_write);
268257

269-
builder = {
270-
#[allow(deprecated)]
271-
builder.set_max_statistics_size(
272-
max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE),
273-
)
274-
};
275-
276258
if let Some(bloom_filter_fpp) = bloom_filter_fpp {
277259
builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp);
278260
};
@@ -465,12 +447,10 @@ mod tests {
465447
fn column_options_with_non_defaults(
466448
src_col_defaults: &ParquetOptions,
467449
) -> ParquetColumnOptions {
468-
#[allow(deprecated)] // max_statistics_size
469450
ParquetColumnOptions {
470451
compression: Some("zstd(22)".into()),
471452
dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v),
472453
statistics_enabled: Some("none".into()),
473-
max_statistics_size: Some(72),
474454
encoding: Some("RLE".into()),
475455
bloom_filter_enabled: Some(true),
476456
bloom_filter_fpp: Some(0.72),
@@ -495,7 +475,6 @@ mod tests {
495475
dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)),
496476
dictionary_page_size_limit: 42,
497477
statistics_enabled: Some("chunk".into()),
498-
max_statistics_size: Some(42),
499478
max_row_group_size: 42,
500479
created_by: "wordy".into(),
501480
column_index_truncate_length: Some(42),
@@ -554,7 +533,6 @@ mod tests {
554533
),
555534
bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp),
556535
bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv),
557-
max_statistics_size: Some(props.max_statistics_size(&col)),
558536
}
559537
}
560538

@@ -611,7 +589,6 @@ mod tests {
611589
compression: default_col_props.compression,
612590
dictionary_enabled: default_col_props.dictionary_enabled,
613591
statistics_enabled: default_col_props.statistics_enabled,
614-
max_statistics_size: default_col_props.max_statistics_size,
615592
bloom_filter_on_write: default_col_props
616593
.bloom_filter_enabled
617594
.unwrap_or_default(),

datafusion/common/src/scalar/mod.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -891,11 +891,10 @@ fn dict_from_values<K: ArrowDictionaryKeyType>(
891891
.map(|index| {
892892
if values_array.is_valid(index) {
893893
let native_index = K::Native::from_usize(index).ok_or_else(|| {
894-
DataFusionError::Internal(format!(
895-
"Can not create index of type {} from value {}",
896-
K::DATA_TYPE,
897-
index
898-
))
894+
_internal_datafusion_err!(
895+
"Can not create index of type {} from value {index}",
896+
K::DATA_TYPE
897+
)
899898
})?;
900899
Ok(Some(native_index))
901900
} else {
@@ -2192,6 +2191,16 @@ impl ScalarValue {
21922191
}
21932192

21942193
let array: ArrayRef = match &data_type {
2194+
DataType::Decimal32(_precision, _scale) => {
2195+
return _not_impl_err!(
2196+
"Decimal32 not supported in ScalarValue::iter_to_array"
2197+
);
2198+
}
2199+
DataType::Decimal64(_precision, _scale) => {
2200+
return _not_impl_err!(
2201+
"Decimal64 not supported in ScalarValue::iter_to_array"
2202+
);
2203+
}
21952204
DataType::Decimal128(precision, scale) => {
21962205
let decimal_array =
21972206
ScalarValue::iter_to_decimal_array(scalars, *precision, *scale)?;

datafusion/common/src/types/native.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,10 @@ impl From<DataType> for NativeType {
407407
DataType::Union(union_fields, _) => {
408408
Union(LogicalUnionFields::from(&union_fields))
409409
}
410-
DataType::Decimal128(p, s) | DataType::Decimal256(p, s) => Decimal(p, s),
410+
DataType::Decimal32(p, s)
411+
| DataType::Decimal64(p, s)
412+
| DataType::Decimal128(p, s)
413+
| DataType::Decimal256(p, s) => Decimal(p, s),
411414
DataType::Map(field, _) => Map(Arc::new(field.as_ref().into())),
412415
DataType::Dictionary(_, data_type) => data_type.as_ref().clone().into(),
413416
DataType::RunEndEncoded(_, field) => field.data_type().clone().into(),

datafusion/core/tests/fuzz_cases/pruning.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,9 @@ async fn write_parquet_file(
319319
row_groups: Vec<Vec<String>>,
320320
) -> Bytes {
321321
let mut buf = BytesMut::new().writer();
322-
let mut props = WriterProperties::builder();
323-
if let Some(truncation_length) = truncation_length {
324-
props = {
325-
#[allow(deprecated)]
326-
props.set_max_statistics_size(truncation_length)
327-
}
328-
}
329-
props = props.set_statistics_enabled(EnabledStatistics::Chunk); // row group level
322+
let props = WriterProperties::builder()
323+
.set_statistics_enabled(EnabledStatistics::Chunk) // row group level
324+
.set_statistics_truncate_length(truncation_length);
330325
let props = props.build();
331326
{
332327
let mut writer =

datafusion/core/tests/parquet/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@ struct ContextWithParquet {
109109

110110
/// The output of running one of the test cases
111111
struct TestOutput {
112-
/// The input string
112+
/// The input query SQL
113113
sql: String,
114114
/// Execution metrics for the Parquet Scan
115115
parquet_metrics: MetricsSet,
116-
/// number of rows in results
116+
/// number of actual rows in results
117117
result_rows: usize,
118118
/// the contents of the input, as a string
119119
pretty_input: String,

0 commit comments

Comments
 (0)