Skip to content

Commit b6c3582

Browse files
committed
chore: use enum as date_trunc granularity
1 parent f57da83 commit b6c3582

File tree

1 file changed

+142
-74
lines changed

1 file changed

+142
-74
lines changed

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 142 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,71 @@ use chrono::{
4747
DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
4848
};
4949

50+
/// Represents the granularity for date truncation operations
51+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52+
enum DateTruncGranularity {
53+
Microsecond,
54+
Millisecond,
55+
Second,
56+
Minute,
57+
Hour,
58+
Day,
59+
Week,
60+
Month,
61+
Quarter,
62+
Year,
63+
}
64+
65+
impl DateTruncGranularity {
66+
/// Mapping of string representations to enum variants
67+
const GRANULARITY_MAP: &[(&str, Self)] = &[
68+
("microsecond", Self::Microsecond),
69+
("millisecond", Self::Millisecond),
70+
("second", Self::Second),
71+
("minute", Self::Minute),
72+
("hour", Self::Hour),
73+
("day", Self::Day),
74+
("week", Self::Week),
75+
("month", Self::Month),
76+
("quarter", Self::Quarter),
77+
("year", Self::Year),
78+
];
79+
80+
/// Parse a granularity string into a DateTruncGranularity enum
81+
fn from_str(s: &str) -> Result<Self> {
82+
let s_lower = s.to_lowercase();
83+
Self::GRANULARITY_MAP
84+
.iter()
85+
.find(|(key, _)| *key == s_lower.as_str())
86+
.map(|(_, value)| *value)
87+
.ok_or_else(|| {
88+
let supported = Self::GRANULARITY_MAP
89+
.iter()
90+
.map(|(key, _)| *key)
91+
.collect::<Vec<_>>()
92+
.join(", ");
93+
exec_datafusion_err!(
94+
"Unsupported date_trunc granularity: {s}. Supported values are: {supported}"
95+
)
96+
})
97+
}
98+
99+
/// Returns true if this granularity can be handled with simple arithmetic
100+
/// (fine granularity: second, minute, millisecond, microsecond)
101+
fn is_fine_granularity(&self) -> bool {
102+
matches!(
103+
self,
104+
Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
105+
)
106+
}
107+
108+
/// Returns true if this granularity can be handled with simple arithmetic in UTC
109+
/// (hour and day in addition to fine granularities)
110+
fn is_fine_granularity_utc(&self) -> bool {
111+
self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
112+
}
113+
}
114+
50115
#[user_doc(
51116
doc_section(label = "Time and Date Functions"),
52117
description = "Truncates a timestamp value to a specified precision.",
@@ -172,7 +237,7 @@ impl ScalarUDFImpl for DateTruncFunc {
172237
let args = args.args;
173238
let (granularity, array) = (&args[0], &args[1]);
174239

175-
let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
240+
let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
176241
granularity
177242
{
178243
v.to_lowercase()
@@ -183,54 +248,46 @@ impl ScalarUDFImpl for DateTruncFunc {
183248
return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
184249
};
185250

251+
let granularity = DateTruncGranularity::from_str(&granularity_str)?;
252+
186253
fn process_array<T: ArrowTimestampType>(
187254
array: &dyn Array,
188-
granularity: String,
255+
granularity: DateTruncGranularity,
189256
tz_opt: &Option<Arc<str>>,
190257
) -> Result<ColumnarValue> {
191258
let parsed_tz = parse_tz(tz_opt)?;
192259
let array = as_primitive_array::<T>(array)?;
193260

194-
// fast path for fine granularities
195-
if matches!(
196-
granularity.as_str(),
197-
// For modern timezones, it's correct to truncate "minute" in this way.
198-
// Both datafusion and arrow are ignoring historical timezone's non-minute granularity
199-
// bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
200-
"second" | "minute" | "millisecond" | "microsecond"
201-
) ||
261+
// fast path for fine granularity
262+
// For modern timezones, it's correct to truncate "minute" in this way.
263+
// Both datafusion and arrow are ignoring historical timezone's non-minute granularity
264+
// bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
202265
// In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
203-
(parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
266+
if granularity.is_fine_granularity()
267+
|| (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
204268
{
205269
let result = general_date_trunc_array_fine_granularity(
206270
T::UNIT,
207271
array,
208-
granularity.as_str(),
272+
granularity,
209273
)?;
210274
return Ok(ColumnarValue::Array(result));
211275
}
212276

213277
let array: PrimitiveArray<T> = array
214-
.try_unary(|x| {
215-
general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
216-
})?
278+
.try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
217279
.with_timezone_opt(tz_opt.clone());
218280
Ok(ColumnarValue::Array(Arc::new(array)))
219281
}
220282

221283
fn process_scalar<T: ArrowTimestampType>(
222284
v: &Option<i64>,
223-
granularity: String,
285+
granularity: DateTruncGranularity,
224286
tz_opt: &Option<Arc<str>>,
225287
) -> Result<ColumnarValue> {
226288
let parsed_tz = parse_tz(tz_opt)?;
227289
let value = if let Some(v) = v {
228-
Some(general_date_trunc(
229-
T::UNIT,
230-
*v,
231-
parsed_tz,
232-
granularity.as_str(),
233-
)?)
290+
Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
234291
} else {
235292
None
236293
};
@@ -308,57 +365,57 @@ impl ScalarUDFImpl for DateTruncFunc {
308365
}
309366
}
310367

311-
fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
368+
fn _date_trunc_coarse<T>(
369+
granularity: DateTruncGranularity,
370+
value: Option<T>,
371+
) -> Result<Option<T>>
312372
where
313373
T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
314374
{
315375
let value = match granularity {
316-
"millisecond" => value,
317-
"microsecond" => value,
318-
"second" => value.and_then(|d| d.with_nanosecond(0)),
319-
"minute" => value
376+
DateTruncGranularity::Millisecond => value,
377+
DateTruncGranularity::Microsecond => value,
378+
DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
379+
DateTruncGranularity::Minute => value
320380
.and_then(|d| d.with_nanosecond(0))
321381
.and_then(|d| d.with_second(0)),
322-
"hour" => value
382+
DateTruncGranularity::Hour => value
323383
.and_then(|d| d.with_nanosecond(0))
324384
.and_then(|d| d.with_second(0))
325385
.and_then(|d| d.with_minute(0)),
326-
"day" => value
386+
DateTruncGranularity::Day => value
327387
.and_then(|d| d.with_nanosecond(0))
328388
.and_then(|d| d.with_second(0))
329389
.and_then(|d| d.with_minute(0))
330390
.and_then(|d| d.with_hour(0)),
331-
"week" => value
391+
DateTruncGranularity::Week => value
332392
.and_then(|d| d.with_nanosecond(0))
333393
.and_then(|d| d.with_second(0))
334394
.and_then(|d| d.with_minute(0))
335395
.and_then(|d| d.with_hour(0))
336396
.map(|d| {
337397
d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
338398
}),
339-
"month" => value
399+
DateTruncGranularity::Month => value
340400
.and_then(|d| d.with_nanosecond(0))
341401
.and_then(|d| d.with_second(0))
342402
.and_then(|d| d.with_minute(0))
343403
.and_then(|d| d.with_hour(0))
344404
.and_then(|d| d.with_day0(0)),
345-
"quarter" => value
405+
DateTruncGranularity::Quarter => value
346406
.and_then(|d| d.with_nanosecond(0))
347407
.and_then(|d| d.with_second(0))
348408
.and_then(|d| d.with_minute(0))
349409
.and_then(|d| d.with_hour(0))
350410
.and_then(|d| d.with_day0(0))
351411
.and_then(|d| d.with_month(quarter_month(&d))),
352-
"year" => value
412+
DateTruncGranularity::Year => value
353413
.and_then(|d| d.with_nanosecond(0))
354414
.and_then(|d| d.with_second(0))
355415
.and_then(|d| d.with_minute(0))
356416
.and_then(|d| d.with_hour(0))
357417
.and_then(|d| d.with_day0(0))
358418
.and_then(|d| d.with_month0(0)),
359-
unsupported => {
360-
return exec_err!("Unsupported date_trunc granularity: {unsupported}");
361-
}
362419
};
363420
Ok(value)
364421
}
@@ -371,7 +428,7 @@ where
371428
}
372429

373430
fn _date_trunc_coarse_with_tz(
374-
granularity: &str,
431+
granularity: DateTruncGranularity,
375432
value: Option<DateTime<Tz>>,
376433
) -> Result<Option<i64>> {
377434
if let Some(value) = value {
@@ -413,7 +470,7 @@ fn _date_trunc_coarse_with_tz(
413470
}
414471

415472
fn _date_trunc_coarse_without_tz(
416-
granularity: &str,
473+
granularity: DateTruncGranularity,
417474
value: Option<NaiveDateTime>,
418475
) -> Result<Option<i64>> {
419476
let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
@@ -424,7 +481,11 @@ fn _date_trunc_coarse_without_tz(
424481
/// epoch, for granularities greater than 1 second, in taking into
425482
/// account that some granularities are not uniform durations of time
426483
/// (e.g. months are not always the same lengths, leap seconds, etc)
427-
fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
484+
fn date_trunc_coarse(
485+
granularity: DateTruncGranularity,
486+
value: i64,
487+
tz: Option<Tz>,
488+
) -> Result<i64> {
428489
let value = match tz {
429490
Some(tz) => {
430491
// Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
@@ -454,30 +515,30 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
454515
fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
455516
tu: TimeUnit,
456517
array: &PrimitiveArray<T>,
457-
granularity: &str,
518+
granularity: DateTruncGranularity,
458519
) -> Result<ArrayRef> {
459520
let unit = match (tu, granularity) {
460-
(Second, "minute") => NonZeroI64::new(60),
461-
(Second, "hour") => NonZeroI64::new(3600),
462-
(Second, "day") => NonZeroI64::new(86400),
463-
464-
(Millisecond, "second") => NonZeroI64::new(1_000),
465-
(Millisecond, "minute") => NonZeroI64::new(60_000),
466-
(Millisecond, "hour") => NonZeroI64::new(3_600_000),
467-
(Millisecond, "day") => NonZeroI64::new(86_400_000),
468-
469-
(Microsecond, "millisecond") => NonZeroI64::new(1_000),
470-
(Microsecond, "second") => NonZeroI64::new(1_000_000),
471-
(Microsecond, "minute") => NonZeroI64::new(60_000_000),
472-
(Microsecond, "hour") => NonZeroI64::new(3_600_000_000),
473-
(Microsecond, "day") => NonZeroI64::new(86_400_000_000),
474-
475-
(Nanosecond, "microsecond") => NonZeroI64::new(1_000),
476-
(Nanosecond, "millisecond") => NonZeroI64::new(1_000_000),
477-
(Nanosecond, "second") => NonZeroI64::new(1_000_000_000),
478-
(Nanosecond, "minute") => NonZeroI64::new(60_000_000_000),
479-
(Nanosecond, "hour") => NonZeroI64::new(3_600_000_000_000),
480-
(Nanosecond, "day") => NonZeroI64::new(86_400_000_000_000),
521+
(Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
522+
(Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
523+
(Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
524+
525+
(Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
526+
(Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
527+
(Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
528+
(Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
529+
530+
(Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
531+
(Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
532+
(Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
533+
(Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
534+
(Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
535+
536+
(Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
537+
(Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
538+
(Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
539+
(Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
540+
(Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
541+
(Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
481542
_ => None,
482543
};
483544

@@ -502,7 +563,7 @@ fn general_date_trunc(
502563
tu: TimeUnit,
503564
value: i64,
504565
tz: Option<Tz>,
505-
granularity: &str,
566+
granularity: DateTruncGranularity,
506567
) -> Result<i64, DataFusionError> {
507568
let scale = match tu {
508569
Second => 1_000_000_000,
@@ -516,25 +577,29 @@ fn general_date_trunc(
516577

517578
let result = match tu {
518579
Second => match granularity {
519-
"minute" => nano / 1_000_000_000 / 60 * 60,
580+
DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
520581
_ => nano / 1_000_000_000,
521582
},
522583
Millisecond => match granularity {
523-
"minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
524-
"second" => nano / 1_000_000 / 1_000 * 1_000,
584+
DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
585+
DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
525586
_ => nano / 1_000_000,
526587
},
527588
Microsecond => match granularity {
528-
"minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
529-
"second" => nano / 1_000 / 1_000_000 * 1_000_000,
530-
"millisecond" => nano / 1_000 / 1_000 * 1_000,
589+
DateTruncGranularity::Minute => {
590+
nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
591+
}
592+
DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
593+
DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
531594
_ => nano / 1_000,
532595
},
533596
_ => match granularity {
534-
"minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
535-
"second" => nano / 1_000_000_000 * 1_000_000_000,
536-
"millisecond" => nano / 1_000_000 * 1_000_000,
537-
"microsecond" => nano / 1_000 * 1_000,
597+
DateTruncGranularity::Minute => {
598+
nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
599+
}
600+
DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
601+
DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
602+
DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
538603
_ => nano,
539604
},
540605
};
@@ -554,7 +619,9 @@ fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
554619
mod tests {
555620
use std::sync::Arc;
556621

557-
use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
622+
use crate::datetime::date_trunc::{
623+
date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
624+
};
558625

559626
use arrow::array::cast::as_primitive_array;
560627
use arrow::array::types::TimestampNanosecondType;
@@ -655,7 +722,8 @@ mod tests {
655722
cases.iter().for_each(|(original, granularity, expected)| {
656723
let left = string_to_timestamp_nanos(original).unwrap();
657724
let right = string_to_timestamp_nanos(expected).unwrap();
658-
let result = date_trunc_coarse(granularity, left, None).unwrap();
725+
let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
726+
let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
659727
assert_eq!(result, right, "{original} = {expected}");
660728
});
661729
}

0 commit comments

Comments
 (0)