Skip to content

Commit eadf1a4

Browse files
committed
[SPARK-52848][SQL] Avoid cast to Double in casting TIME/TIMESTAMP to DECIMAL
### What changes were proposed in this pull request? In the PR, I propose to simplify casting TIME/TIMESTAMP to DECIMAL, and avoid intermediate casting to Double. ### Why are the changes needed? To avoid unnecessary arithmetic operations and to improve code maintenance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *CastWithAnsiOnSuite" $ build/sbt "test:testOnly *CastWithAnsiOffSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z cast.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51539 from MaxGekk/cast-dec-avoid-double. Authored-by: Max Gekk <[email protected]> Signed-off-by: Max Gekk <[email protected]>
1 parent 197c9d6 commit eadf1a4

File tree

3 files changed

+15
-19
lines changed

3 files changed

+15
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -731,9 +731,6 @@ case class Cast(
731731
private[this] def timestampToDouble(ts: Long): Double = {
732732
ts / MICROS_PER_SECOND.toDouble
733733
}
734-
private[this] def timeToDouble(timeNanos: Long): Double = {
735-
timeNanos / NANOS_PER_SECOND.toDouble
736-
}
737734
private[this] def timeToLong(timeNanos: Long): Long = {
738735
Math.floorDiv(timeNanos, NANOS_PER_SECOND)
739736
}
@@ -1047,11 +1044,15 @@ case class Cast(
10471044
b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target, getContextOrNull()))
10481045
case DateType =>
10491046
buildCast[Int](_, d => null) // date can't cast to decimal in Hive
1050-
case TimestampType =>
1051-
// Note that we lose precision here.
1052-
buildCast[Long](_, t => changePrecision(Decimal(timestampToDouble(t)), target))
1053-
case _: TimeType =>
1054-
buildCast[Long](_, t => changePrecision(Decimal(timeToDouble(t)), target))
1047+
case TimestampType => buildCast[Long](_, t => changePrecision(
1048+
// 19 digits is enough to represent any TIMESTAMP value in Long.
1049+
// 6 digits of scale is for microseconds precision of TIMESTAMP values.
1050+
Decimal.apply(t, 19, 6), target))
1051+
case _: TimeType => buildCast[Long](_, t => changePrecision(
1052+
// 14 digits is enough to cover the full range of TIME value [0, 24:00) which is
1053+
// [0, 24 * 60 * 60 * 1000 * 1000 * 1000) = [0, 86400000000000).
1054+
// 9 digits of scale is for nanoseconds precision of TIME values.
1055+
Decimal.apply(t, precision = 14, scale = 9), target))
10551056
case dt: DecimalType =>
10561057
b => toPrecision(b.asInstanceOf[Decimal], target, getContextOrNull())
10571058
case t: IntegralType =>
@@ -1510,18 +1511,15 @@ case class Cast(
15101511
// date can't cast to decimal in Hive
15111512
(c, evPrim, evNull) => code"$evNull = true;"
15121513
case TimestampType =>
1513-
// Note that we lose precision here.
15141514
(c, evPrim, evNull) =>
15151515
code"""
1516-
Decimal $tmp = Decimal.apply(
1517-
scala.math.BigDecimal.valueOf(${timestampToDoubleCode(c)}));
1516+
Decimal $tmp = Decimal.apply($c, 19, 6);
15181517
${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)}
15191518
"""
15201519
case _: TimeType =>
15211520
(c, evPrim, evNull) =>
15221521
code"""
1523-
Decimal $tmp = Decimal.apply(
1524-
scala.math.BigDecimal.valueOf(${timeToDoubleCode(c)}));
1522+
Decimal $tmp = Decimal.apply($c, 14, 9);
15251523
${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)}
15261524
"""
15271525
case DecimalType() =>
@@ -1771,8 +1769,6 @@ case class Cast(
17711769
private[this] def timestampToDoubleCode(ts: ExprValue): Block =
17721770
code"$ts / (double)$MICROS_PER_SECOND"
17731771

1774-
private[this] def timeToDoubleCode(ts: ExprValue): Block =
1775-
code"$ts / (double)$NANOS_PER_SECOND"
17761772
private[this] def timeToLongCode(timeValue: ExprValue): Block =
17771773
code"Math.floorDiv($timeValue, ${NANOS_PER_SECOND}L)"
17781774

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ class CastWithAnsiOnSuite extends CastSuiteBase with QueryErrorsBase {
810810
),
811811
condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
812812
parameters = Map(
813-
"value" -> "86399.123456",
813+
"value" -> "86399.123456000",
814814
"precision" -> "2",
815815
"scale" -> "0",
816816
"config" -> """"spark.sql.ansi.enabled""""

sql/core/src/test/resources/sql-tests/results/cast.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2305,7 +2305,7 @@ org.apache.spark.SparkArithmeticException
23052305
"config" : "\"spark.sql.ansi.enabled\"",
23062306
"precision" : "1",
23072307
"scale" : "0",
2308-
"value" : "60.0"
2308+
"value" : "60.000000000"
23092309
},
23102310
"queryContext" : [ {
23112311
"objectType" : "",
@@ -2330,7 +2330,7 @@ org.apache.spark.SparkArithmeticException
23302330
"config" : "\"spark.sql.ansi.enabled\"",
23312331
"precision" : "3",
23322332
"scale" : "0",
2333-
"value" : "3600.0"
2333+
"value" : "3600.000000000"
23342334
},
23352335
"queryContext" : [ {
23362336
"objectType" : "",
@@ -2355,7 +2355,7 @@ org.apache.spark.SparkArithmeticException
23552355
"config" : "\"spark.sql.ansi.enabled\"",
23562356
"precision" : "5",
23572357
"scale" : "2",
2358-
"value" : "36000.0"
2358+
"value" : "36000.000000000"
23592359
},
23602360
"queryContext" : [ {
23612361
"objectType" : "",

0 commit comments

Comments
 (0)