@@ -47,6 +47,71 @@ use chrono::{
4747 DateTime , Datelike , Duration , LocalResult , NaiveDateTime , Offset , TimeDelta , Timelike ,
4848} ;
4949
50+ /// Represents the granularity for date truncation operations
51+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
52+ enum DateTruncGranularity {
53+ Microsecond ,
54+ Millisecond ,
55+ Second ,
56+ Minute ,
57+ Hour ,
58+ Day ,
59+ Week ,
60+ Month ,
61+ Quarter ,
62+ Year ,
63+ }
64+
65+ impl DateTruncGranularity {
66+ /// Mapping of string representations to enum variants
67+ const GRANULARITY_MAP : & [ ( & str , Self ) ] = & [
68+ ( "microsecond" , Self :: Microsecond ) ,
69+ ( "millisecond" , Self :: Millisecond ) ,
70+ ( "second" , Self :: Second ) ,
71+ ( "minute" , Self :: Minute ) ,
72+ ( "hour" , Self :: Hour ) ,
73+ ( "day" , Self :: Day ) ,
74+ ( "week" , Self :: Week ) ,
75+ ( "month" , Self :: Month ) ,
76+ ( "quarter" , Self :: Quarter ) ,
77+ ( "year" , Self :: Year ) ,
78+ ] ;
79+
80+ /// Parse a granularity string into a DateTruncGranularity enum
81+ fn from_str ( s : & str ) -> Result < Self > {
82+ let s_lower = s. to_lowercase ( ) ;
83+ Self :: GRANULARITY_MAP
84+ . iter ( )
85+ . find ( |( key, _) | * key == s_lower. as_str ( ) )
86+ . map ( |( _, value) | * value)
87+ . ok_or_else ( || {
88+ let supported = Self :: GRANULARITY_MAP
89+ . iter ( )
90+ . map ( |( key, _) | * key)
91+ . collect :: < Vec < _ > > ( )
92+ . join ( ", " ) ;
93+ exec_datafusion_err ! (
94+ "Unsupported date_trunc granularity: {s}. Supported values are: {supported}"
95+ )
96+ } )
97+ }
98+
99+ /// Returns true if this granularity can be handled with simple arithmetic
100+ /// (fine granularity: second, minute, millisecond, microsecond)
101+ fn is_fine_granularity ( & self ) -> bool {
102+ matches ! (
103+ self ,
104+ Self :: Second | Self :: Minute | Self :: Millisecond | Self :: Microsecond
105+ )
106+ }
107+
108+ /// Returns true if this granularity can be handled with simple arithmetic in UTC
109+ /// (hour and day in addition to fine granularities)
110+ fn is_fine_granularity_utc ( & self ) -> bool {
111+ self . is_fine_granularity ( ) || matches ! ( self , Self :: Hour | Self :: Day )
112+ }
113+ }
114+
50115#[ user_doc(
51116 doc_section( label = "Time and Date Functions" ) ,
52117 description = "Truncates a timestamp value to a specified precision." ,
@@ -172,7 +237,7 @@ impl ScalarUDFImpl for DateTruncFunc {
172237 let args = args. args ;
173238 let ( granularity, array) = ( & args[ 0 ] , & args[ 1 ] ) ;
174239
175- let granularity = if let ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( v) ) ) =
240+ let granularity_str = if let ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( v) ) ) =
176241 granularity
177242 {
178243 v. to_lowercase ( )
@@ -183,54 +248,46 @@ impl ScalarUDFImpl for DateTruncFunc {
183248 return exec_err ! ( "Granularity of `date_trunc` must be non-null scalar Utf8" ) ;
184249 } ;
185250
251+ let granularity = DateTruncGranularity :: from_str ( & granularity_str) ?;
252+
186253 fn process_array < T : ArrowTimestampType > (
187254 array : & dyn Array ,
188- granularity : String ,
255+ granularity : DateTruncGranularity ,
189256 tz_opt : & Option < Arc < str > > ,
190257 ) -> Result < ColumnarValue > {
191258 let parsed_tz = parse_tz ( tz_opt) ?;
192259 let array = as_primitive_array :: < T > ( array) ?;
193260
194- // fast path for fine granularities
195- if matches ! (
196- granularity. as_str( ) ,
197- // For modern timezones, it's correct to truncate "minute" in this way.
198- // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
199- // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
200- "second" | "minute" | "millisecond" | "microsecond"
201- ) ||
261+ // fast path for fine granularity
262+ // For modern timezones, it's correct to truncate "minute" in this way.
263+ // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
264+ // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
202265 // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
203- ( parsed_tz. is_none ( ) && matches ! ( granularity. as_str( ) , "hour" | "day" ) )
266+ if granularity. is_fine_granularity ( )
267+ || ( parsed_tz. is_none ( ) && granularity. is_fine_granularity_utc ( ) )
204268 {
205269 let result = general_date_trunc_array_fine_granularity (
206270 T :: UNIT ,
207271 array,
208- granularity. as_str ( ) ,
272+ granularity,
209273 ) ?;
210274 return Ok ( ColumnarValue :: Array ( result) ) ;
211275 }
212276
213277 let array: PrimitiveArray < T > = array
214- . try_unary ( |x| {
215- general_date_trunc ( T :: UNIT , x, parsed_tz, granularity. as_str ( ) )
216- } ) ?
278+ . try_unary ( |x| general_date_trunc ( T :: UNIT , x, parsed_tz, granularity) ) ?
217279 . with_timezone_opt ( tz_opt. clone ( ) ) ;
218280 Ok ( ColumnarValue :: Array ( Arc :: new ( array) ) )
219281 }
220282
221283 fn process_scalar < T : ArrowTimestampType > (
222284 v : & Option < i64 > ,
223- granularity : String ,
285+ granularity : DateTruncGranularity ,
224286 tz_opt : & Option < Arc < str > > ,
225287 ) -> Result < ColumnarValue > {
226288 let parsed_tz = parse_tz ( tz_opt) ?;
227289 let value = if let Some ( v) = v {
228- Some ( general_date_trunc (
229- T :: UNIT ,
230- * v,
231- parsed_tz,
232- granularity. as_str ( ) ,
233- ) ?)
290+ Some ( general_date_trunc ( T :: UNIT , * v, parsed_tz, granularity) ?)
234291 } else {
235292 None
236293 } ;
@@ -308,57 +365,57 @@ impl ScalarUDFImpl for DateTruncFunc {
308365 }
309366}
310367
311- fn _date_trunc_coarse < T > ( granularity : & str , value : Option < T > ) -> Result < Option < T > >
368+ fn _date_trunc_coarse < T > (
369+ granularity : DateTruncGranularity ,
370+ value : Option < T > ,
371+ ) -> Result < Option < T > >
312372where
313373 T : Datelike + Timelike + Sub < Duration , Output = T > + Copy ,
314374{
315375 let value = match granularity {
316- "millisecond" => value,
317- "microsecond" => value,
318- "second" => value. and_then ( |d| d. with_nanosecond ( 0 ) ) ,
319- "minute" => value
376+ DateTruncGranularity :: Millisecond => value,
377+ DateTruncGranularity :: Microsecond => value,
378+ DateTruncGranularity :: Second => value. and_then ( |d| d. with_nanosecond ( 0 ) ) ,
379+ DateTruncGranularity :: Minute => value
320380 . and_then ( |d| d. with_nanosecond ( 0 ) )
321381 . and_then ( |d| d. with_second ( 0 ) ) ,
322- "hour" => value
382+ DateTruncGranularity :: Hour => value
323383 . and_then ( |d| d. with_nanosecond ( 0 ) )
324384 . and_then ( |d| d. with_second ( 0 ) )
325385 . and_then ( |d| d. with_minute ( 0 ) ) ,
326- "day" => value
386+ DateTruncGranularity :: Day => value
327387 . and_then ( |d| d. with_nanosecond ( 0 ) )
328388 . and_then ( |d| d. with_second ( 0 ) )
329389 . and_then ( |d| d. with_minute ( 0 ) )
330390 . and_then ( |d| d. with_hour ( 0 ) ) ,
331- "week" => value
391+ DateTruncGranularity :: Week => value
332392 . and_then ( |d| d. with_nanosecond ( 0 ) )
333393 . and_then ( |d| d. with_second ( 0 ) )
334394 . and_then ( |d| d. with_minute ( 0 ) )
335395 . and_then ( |d| d. with_hour ( 0 ) )
336396 . map ( |d| {
337397 d - TimeDelta :: try_seconds ( 60 * 60 * 24 * d. weekday ( ) as i64 ) . unwrap ( )
338398 } ) ,
339- "month" => value
399+ DateTruncGranularity :: Month => value
340400 . and_then ( |d| d. with_nanosecond ( 0 ) )
341401 . and_then ( |d| d. with_second ( 0 ) )
342402 . and_then ( |d| d. with_minute ( 0 ) )
343403 . and_then ( |d| d. with_hour ( 0 ) )
344404 . and_then ( |d| d. with_day0 ( 0 ) ) ,
345- "quarter" => value
405+ DateTruncGranularity :: Quarter => value
346406 . and_then ( |d| d. with_nanosecond ( 0 ) )
347407 . and_then ( |d| d. with_second ( 0 ) )
348408 . and_then ( |d| d. with_minute ( 0 ) )
349409 . and_then ( |d| d. with_hour ( 0 ) )
350410 . and_then ( |d| d. with_day0 ( 0 ) )
351411 . and_then ( |d| d. with_month ( quarter_month ( & d) ) ) ,
352- "year" => value
412+ DateTruncGranularity :: Year => value
353413 . and_then ( |d| d. with_nanosecond ( 0 ) )
354414 . and_then ( |d| d. with_second ( 0 ) )
355415 . and_then ( |d| d. with_minute ( 0 ) )
356416 . and_then ( |d| d. with_hour ( 0 ) )
357417 . and_then ( |d| d. with_day0 ( 0 ) )
358418 . and_then ( |d| d. with_month0 ( 0 ) ) ,
359- unsupported => {
360- return exec_err ! ( "Unsupported date_trunc granularity: {unsupported}" ) ;
361- }
362419 } ;
363420 Ok ( value)
364421}
@@ -371,7 +428,7 @@ where
371428}
372429
373430fn _date_trunc_coarse_with_tz (
374- granularity : & str ,
431+ granularity : DateTruncGranularity ,
375432 value : Option < DateTime < Tz > > ,
376433) -> Result < Option < i64 > > {
377434 if let Some ( value) = value {
@@ -413,7 +470,7 @@ fn _date_trunc_coarse_with_tz(
413470}
414471
415472fn _date_trunc_coarse_without_tz (
416- granularity : & str ,
473+ granularity : DateTruncGranularity ,
417474 value : Option < NaiveDateTime > ,
418475) -> Result < Option < i64 > > {
419476 let value = _date_trunc_coarse :: < NaiveDateTime > ( granularity, value) ?;
@@ -424,7 +481,11 @@ fn _date_trunc_coarse_without_tz(
424481/// epoch, for granularities greater than 1 second, in taking into
425482/// account that some granularities are not uniform durations of time
426483/// (e.g. months are not always the same lengths, leap seconds, etc)
427- fn date_trunc_coarse ( granularity : & str , value : i64 , tz : Option < Tz > ) -> Result < i64 > {
484+ fn date_trunc_coarse (
485+ granularity : DateTruncGranularity ,
486+ value : i64 ,
487+ tz : Option < Tz > ,
488+ ) -> Result < i64 > {
428489 let value = match tz {
429490 Some ( tz) => {
430491 // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
@@ -454,30 +515,30 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
454515fn general_date_trunc_array_fine_granularity < T : ArrowTimestampType > (
455516 tu : TimeUnit ,
456517 array : & PrimitiveArray < T > ,
457- granularity : & str ,
518+ granularity : DateTruncGranularity ,
458519) -> Result < ArrayRef > {
459520 let unit = match ( tu, granularity) {
460- ( Second , "minute" ) => NonZeroI64 :: new ( 60 ) ,
461- ( Second , "hour" ) => NonZeroI64 :: new ( 3600 ) ,
462- ( Second , "day" ) => NonZeroI64 :: new ( 86400 ) ,
463-
464- ( Millisecond , "second" ) => NonZeroI64 :: new ( 1_000 ) ,
465- ( Millisecond , "minute" ) => NonZeroI64 :: new ( 60_000 ) ,
466- ( Millisecond , "hour" ) => NonZeroI64 :: new ( 3_600_000 ) ,
467- ( Millisecond , "day" ) => NonZeroI64 :: new ( 86_400_000 ) ,
468-
469- ( Microsecond , "millisecond" ) => NonZeroI64 :: new ( 1_000 ) ,
470- ( Microsecond , "second" ) => NonZeroI64 :: new ( 1_000_000 ) ,
471- ( Microsecond , "minute" ) => NonZeroI64 :: new ( 60_000_000 ) ,
472- ( Microsecond , "hour" ) => NonZeroI64 :: new ( 3_600_000_000 ) ,
473- ( Microsecond , "day" ) => NonZeroI64 :: new ( 86_400_000_000 ) ,
474-
475- ( Nanosecond , "microsecond" ) => NonZeroI64 :: new ( 1_000 ) ,
476- ( Nanosecond , "millisecond" ) => NonZeroI64 :: new ( 1_000_000 ) ,
477- ( Nanosecond , "second" ) => NonZeroI64 :: new ( 1_000_000_000 ) ,
478- ( Nanosecond , "minute" ) => NonZeroI64 :: new ( 60_000_000_000 ) ,
479- ( Nanosecond , "hour" ) => NonZeroI64 :: new ( 3_600_000_000_000 ) ,
480- ( Nanosecond , "day" ) => NonZeroI64 :: new ( 86_400_000_000_000 ) ,
521+ ( Second , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60 ) ,
522+ ( Second , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3600 ) ,
523+ ( Second , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86400 ) ,
524+
525+ ( Millisecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000 ) ,
526+ ( Millisecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000 ) ,
527+ ( Millisecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000 ) ,
528+ ( Millisecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000 ) ,
529+
530+ ( Microsecond , DateTruncGranularity :: Millisecond ) => NonZeroI64 :: new ( 1_000 ) ,
531+ ( Microsecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000_000 ) ,
532+ ( Microsecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000_000 ) ,
533+ ( Microsecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000_000 ) ,
534+ ( Microsecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000_000 ) ,
535+
536+ ( Nanosecond , DateTruncGranularity :: Microsecond ) => NonZeroI64 :: new ( 1_000 ) ,
537+ ( Nanosecond , DateTruncGranularity :: Millisecond ) => NonZeroI64 :: new ( 1_000_000 ) ,
538+ ( Nanosecond , DateTruncGranularity :: Second ) => NonZeroI64 :: new ( 1_000_000_000 ) ,
539+ ( Nanosecond , DateTruncGranularity :: Minute ) => NonZeroI64 :: new ( 60_000_000_000 ) ,
540+ ( Nanosecond , DateTruncGranularity :: Hour ) => NonZeroI64 :: new ( 3_600_000_000_000 ) ,
541+ ( Nanosecond , DateTruncGranularity :: Day ) => NonZeroI64 :: new ( 86_400_000_000_000 ) ,
481542 _ => None ,
482543 } ;
483544
@@ -502,7 +563,7 @@ fn general_date_trunc(
502563 tu : TimeUnit ,
503564 value : i64 ,
504565 tz : Option < Tz > ,
505- granularity : & str ,
566+ granularity : DateTruncGranularity ,
506567) -> Result < i64 , DataFusionError > {
507568 let scale = match tu {
508569 Second => 1_000_000_000 ,
@@ -516,25 +577,29 @@ fn general_date_trunc(
516577
517578 let result = match tu {
518579 Second => match granularity {
519- "minute" => nano / 1_000_000_000 / 60 * 60 ,
580+ DateTruncGranularity :: Minute => nano / 1_000_000_000 / 60 * 60 ,
520581 _ => nano / 1_000_000_000 ,
521582 } ,
522583 Millisecond => match granularity {
523- "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60 ,
524- "second" => nano / 1_000_000 / 1_000 * 1_000 ,
584+ DateTruncGranularity :: Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60 ,
585+ DateTruncGranularity :: Second => nano / 1_000_000 / 1_000 * 1_000 ,
525586 _ => nano / 1_000_000 ,
526587 } ,
527588 Microsecond => match granularity {
528- "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000 ,
529- "second" => nano / 1_000 / 1_000_000 * 1_000_000 ,
530- "millisecond" => nano / 1_000 / 1_000 * 1_000 ,
589+ DateTruncGranularity :: Minute => {
590+ nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
591+ }
592+ DateTruncGranularity :: Second => nano / 1_000 / 1_000_000 * 1_000_000 ,
593+ DateTruncGranularity :: Millisecond => nano / 1_000 / 1_000 * 1_000 ,
531594 _ => nano / 1_000 ,
532595 } ,
533596 _ => match granularity {
534- "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60 ,
535- "second" => nano / 1_000_000_000 * 1_000_000_000 ,
536- "millisecond" => nano / 1_000_000 * 1_000_000 ,
537- "microsecond" => nano / 1_000 * 1_000 ,
597+ DateTruncGranularity :: Minute => {
598+ nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
599+ }
600+ DateTruncGranularity :: Second => nano / 1_000_000_000 * 1_000_000_000 ,
601+ DateTruncGranularity :: Millisecond => nano / 1_000_000 * 1_000_000 ,
602+ DateTruncGranularity :: Microsecond => nano / 1_000 * 1_000 ,
538603 _ => nano,
539604 } ,
540605 } ;
@@ -554,7 +619,9 @@ fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
554619mod tests {
555620 use std:: sync:: Arc ;
556621
557- use crate :: datetime:: date_trunc:: { date_trunc_coarse, DateTruncFunc } ;
622+ use crate :: datetime:: date_trunc:: {
623+ date_trunc_coarse, DateTruncFunc , DateTruncGranularity ,
624+ } ;
558625
559626 use arrow:: array:: cast:: as_primitive_array;
560627 use arrow:: array:: types:: TimestampNanosecondType ;
@@ -655,7 +722,8 @@ mod tests {
655722 cases. iter ( ) . for_each ( |( original, granularity, expected) | {
656723 let left = string_to_timestamp_nanos ( original) . unwrap ( ) ;
657724 let right = string_to_timestamp_nanos ( expected) . unwrap ( ) ;
658- let result = date_trunc_coarse ( granularity, left, None ) . unwrap ( ) ;
725+ let granularity_enum = DateTruncGranularity :: from_str ( granularity) . unwrap ( ) ;
726+ let result = date_trunc_coarse ( granularity_enum, left, None ) . unwrap ( ) ;
659727 assert_eq ! ( result, right, "{original} = {expected}" ) ;
660728 } ) ;
661729 }
0 commit comments