apache · mgaido91 · Dec 17, 2017 · Jan 9, 2018 · Jan 9, 2018 · Jan 10, 2018
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -42,8 +43,10 @@ import org.apache.spark.sql.types._
  *   e1 / e2      p1 - s1 + s2 + max(6, s1 + p2 + 1)      max(6, s1 + p2 + 1)
  *   e1 % e2      min(p1-s1, p2-s2) + max(s1, s2)         max(s1, s2)
  *   e1 union e2  max(s1, s2) + max(p1-s1, p2-s2)         max(s1, s2)
- *   sum(e1)      p1 + 10                                 s1
- *   avg(e1)      p1 + 4                                  s1 + 4
+ *
+ * When `spark.sql.decimalOperations.allowTruncat` is set to true, if the precision / scale needed
+ * are out of the range of available values, the scale is reduced up to 6, in order to prevent the
+ * truncation of the integer part of the decimals.
  *
  * To implement the rules for fixed-precision types, we introduce casts to turn them to unlimited
  * precision, do the math on unlimited-precision numbers, then introduce casts back to the
@@ -56,6 +59,7 @@ import org.apache.spark.sql.types._
  * - INT gets turned into DECIMAL(10, 0)
  * - LONG gets turned into DECIMAL(20, 0)
  * - FLOAT and DOUBLE cause fixed-length decimals to turn into DOUBLE
+ * - Literals INT and LONG gets turned into DECIMAL with the precision strictly needed by the value
  */
 // scalastyle:on
 object DecimalPrecision extends TypeCoercionRule {
@@ -93,41 +97,76 @@ object DecimalPrecision extends TypeCoercionRule {
     case e: BinaryArithmetic if e.left.isInstanceOf[PromotePrecision] => e
 
     case Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      val dt = DecimalType.bounded(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
-      CheckOverflow(Add(promotePrecision(e1, dt), promotePrecision(e2, dt)), dt)
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        val resultScale = max(s1, s2)
+        DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
+          resultScale)
+      } else {
+        DecimalType.bounded(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+      }
+      CheckOverflow(Add(promotePrecision(e1, resultType), promotePrecision(e2, resultType)),
+        resultType)
 
     case Subtract(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      val dt = DecimalType.bounded(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
-      CheckOverflow(Subtract(promotePrecision(e1, dt), promotePrecision(e2, dt)), dt)
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        val resultScale = max(s1, s2)
+        DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
+          resultScale)
+      } else {
+        DecimalType.bounded(max(s1, s2) + max(p1 - s1, p2 - s2) + 1, max(s1, s2))
+      }
+      CheckOverflow(Subtract(promotePrecision(e1, resultType), promotePrecision(e2, resultType)),
+        resultType)
 
     case Multiply(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      val resultType = DecimalType.bounded(p1 + p2 + 1, s1 + s2)
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        DecimalType.adjustPrecisionScale(p1 + p2 + 1, s1 + s2)
+      } else {
+        DecimalType.bounded(p1 + p2 + 1, s1 + s2)
+      }
       val widerType = widerDecimalType(p1, s1, p2, s2)
       CheckOverflow(Multiply(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
         resultType)
 
     case Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      var intDig = min(DecimalType.MAX_SCALE, p1 - s1 + s2)
-      var decDig = min(DecimalType.MAX_SCALE, max(6, s1 + p2 + 1))
-      val diff = (intDig + decDig) - DecimalType.MAX_SCALE
-      if (diff > 0) {
-        decDig -= diff / 2 + 1
-        intDig = DecimalType.MAX_SCALE - decDig
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
+        // Scale: max(6, s1 + p2 + 1)
+        val intDig = p1 - s1 + s2
+        val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1)
+        val prec = intDig + scale
+        DecimalType.adjustPrecisionScale(prec, scale)
+      } else {
+        var intDig = min(DecimalType.MAX_SCALE, p1 - s1 + s2)
+        var decDig = min(DecimalType.MAX_SCALE, max(6, s1 + p2 + 1))
+        val diff = (intDig + decDig) - DecimalType.MAX_SCALE
+        if (diff > 0) {
+          decDig -= diff / 2 + 1
+          intDig = DecimalType.MAX_SCALE - decDig
+        }
+        DecimalType.bounded(intDig + decDig, decDig)
       }
-      val resultType = DecimalType.bounded(intDig + decDig, decDig)
       val widerType = widerDecimalType(p1, s1, p2, s2)
       CheckOverflow(Divide(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
         resultType)
 
     case Remainder(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      val resultType = DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      } else {
+        DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      }
       // resultType may have lower precision, so we cast them into wider type first.
       val widerType = widerDecimalType(p1, s1, p2, s2)
       CheckOverflow(Remainder(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
         resultType)
 
     case Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
-      val resultType = DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      val resultType = if (SQLConf.get.decimalOperationsAllowTruncat) {
+        DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      } else {
+        DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
+      }
       // resultType may have lower precision, so we cast them into wider type first.
       val widerType = widerDecimalType(p1, s1, p2, s2)
       CheckOverflow(Pmod(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
@@ -137,9 +176,6 @@ object DecimalPrecision extends TypeCoercionRule {
     e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
       val resultType = widerDecimalType(p1, s1, p2, s2)
       b.makeCopy(Array(Cast(e1, resultType), Cast(e2, resultType)))
-
-    // TODO: MaxOf, MinOf, etc might want other rules
-    // SUM and AVERAGE are handled by the implementations of those expressions
   }
 
   /**
@@ -243,17 +279,43 @@ object DecimalPrecision extends TypeCoercionRule {
     // Promote integers inside a binary expression with fixed-precision decimals to decimals,
     // and fixed-precision decimals in an expression with floats / doubles to doubles
     case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
-      (left.dataType, right.dataType) match {
-        case (t: IntegralType, DecimalType.Fixed(p, s)) =>
-          b.makeCopy(Array(Cast(left, DecimalType.forType(t)), right))
-        case (DecimalType.Fixed(p, s), t: IntegralType) =>
-          b.makeCopy(Array(left, Cast(right, DecimalType.forType(t))))
-        case (t, DecimalType.Fixed(p, s)) if isFloat(t) =>
-          b.makeCopy(Array(left, Cast(right, DoubleType)))
-        case (DecimalType.Fixed(p, s), t) if isFloat(t) =>
-          b.makeCopy(Array(Cast(left, DoubleType), right))
-        case _ =>
-          b
-      }
+      nondecimalLiteralAndDecimal(b).lift((left, right)).getOrElse(
+        nondecimalNonliteralAndDecimal(b).applyOrElse((left.dataType, right.dataType),
+          (_: (DataType, DataType)) => b))
   }
+
+  /**
+   * Type coercion for BinaryOperator in which one side is a non-decimal literal numeric, and the
+   * other side is a decimal.
+   */
+  private def nondecimalLiteralAndDecimal(
+      b: BinaryOperator): PartialFunction[(Expression, Expression), Expression] = {
+    // Promote literal integers inside a binary expression with fixed-precision decimals to
+    // decimals. The precision and scale are the ones needed by the integer value.
+    case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType]
+      && l.dataType.isInstanceOf[IntegralType] =>
+      b.makeCopy(Array(Cast(l, DecimalType.forLiteral(l)), r))
+    case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType]
+      && r.dataType.isInstanceOf[IntegralType] =>
+      b.makeCopy(Array(l, Cast(r, DecimalType.forLiteral(r))))
+  }
+
+  /**
+   * Type coercion for BinaryOperator in which one side is a non-decimal non-literal numeric, and
+   * the other side is a decimal.
+   */
+  private def nondecimalNonliteralAndDecimal(
+      b: BinaryOperator): PartialFunction[(DataType, DataType), Expression] = {
+    // Promote integers inside a binary expression with fixed-precision decimals to decimals,
+    // and fixed-precision decimals in an expression with floats / doubles to doubles
+    case (t: IntegralType, DecimalType.Fixed(p, s)) =>
+      b.makeCopy(Array(Cast(b.left, DecimalType.forType(t)), b.right))
+    case (DecimalType.Fixed(_, _), t: IntegralType) =>
+      b.makeCopy(Array(b.left, Cast(b.right, DecimalType.forType(t))))
+    case (t, DecimalType.Fixed(_, _)) if isFloat(t) =>
+      b.makeCopy(Array(b.left, Cast(b.right, DoubleType)))
+    case (DecimalType.Fixed(_, _), t) if isFloat(t) =>
+      b.makeCopy(Array(Cast(b.left, DoubleType), b.right))
+  }
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -58,7 +58,7 @@ object Literal {
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(UTF8String.fromString(s), StringType)
     case b: Boolean => Literal(b, BooleanType)
-    case d: BigDecimal => Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale))
+    case d: BigDecimal => Literal(Decimal(d), DecimalType.fromBigDecimal(d))
     case d: JavaBigDecimal =>
       Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale()))
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1048,6 +1048,16 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val DECIMAL_OPERATIONS_ALLOW_TRUNCAT =
+    buildConf("spark.sql.decimalOperations.allowTruncat")
+      .internal()
+      .doc("When true, establishing the result type of an arithmetic operation happens " +
+        "according to Hive behavior and SQL ANSI 2011 specification, ie. rounding the decimal " +
+        "part of the result if an exact representation is not possible. Otherwise, NULL is" +
+        "returned in those cases, as previously (default).")
+      .booleanConf
+      .createWithDefault(false)
+
   val SQL_STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.sql.redaction.string.regex")
       .doc("Regex to decide which parts of strings produced by Spark contain sensitive " +
@@ -1423,6 +1433,8 @@ class SQLConf extends Serializable with Logging {
 
   def replaceExceptWithFilter: Boolean = getConf(REPLACE_EXCEPT_WITH_FILTER)
 
+  def decimalOperationsAllowTruncat: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_TRUNCAT)
+
   def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
 
   def continuousStreamingExecutorPollIntervalMs: Long =

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 
 
 /**
@@ -117,6 +117,7 @@ object DecimalType extends AbstractDataType {
   val MAX_SCALE = 38
   val SYSTEM_DEFAULT: DecimalType = DecimalType(MAX_PRECISION, 18)
   val USER_DEFAULT: DecimalType = DecimalType(10, 0)
+  val MINIMUM_ADJUSTED_SCALE = 6
 
   // The decimal types compatible with other numeric types
   private[sql] val ByteDecimal = DecimalType(3, 0)
@@ -136,10 +137,52 @@ object DecimalType extends AbstractDataType {
     case DoubleType => DoubleDecimal
   }
 
+  private[sql] def forLiteral(literal: Literal): DecimalType = literal.value match {
+    case v: Short => fromBigDecimal(BigDecimal(v))
+    case v: Int => fromBigDecimal(BigDecimal(v))
+    case v: Long => fromBigDecimal(BigDecimal(v))
+    case _ => forType(literal.dataType)
+  }
+
+  private[sql] def fromBigDecimal(d: BigDecimal): DecimalType = {
+    DecimalType(Math.max(d.precision, d.scale), d.scale)
+  }
+
   private[sql] def bounded(precision: Int, scale: Int): DecimalType = {
     DecimalType(min(precision, MAX_PRECISION), min(scale, MAX_SCALE))
   }
 
+  /**
+   * Scale adjustment implementation is based on Hive's one, which is itself inspired to
+   * SQLServer's one. In particular, when a result precision is greater than
+   * {@link #MAX_PRECISION}, the corresponding scale is reduced to prevent the integral part of a
+   * result from being truncated.
+   *
+   * This method is used only when `spark.sql.decimalOperations.allowTruncat` is set to true.
+   *
+   * @param precision
+   * @param scale
+   * @return
+   */
+  private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = {
+    // Assumptions:
+    // precision >= scale
+    // scale >= 0
+    if (precision <= MAX_PRECISION) {
+      // Adjustment only needed when we exceed max precision
+      DecimalType(precision, scale)
+    } else {
+      // Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION.
+      val intDigits = precision - scale
+      // If original scale less than MINIMUM_ADJUSTED_SCALE, use original scale value; otherwise
+      // preserve at least MINIMUM_ADJUSTED_SCALE fractional digits
+      val minScaleValue = Math.min(scale, MINIMUM_ADJUSTED_SCALE)
+      val adjustedScale = Math.max(MAX_PRECISION - intDigits, minScaleValue)
+
+      DecimalType(MAX_PRECISION, adjustedScale)
+    }
+  }
+
   override private[sql] def defaultConcreteType: DataType = SYSTEM_DEFAULT
 
   override private[sql] def acceptsType(other: DataType): Boolean = {

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -408,8 +408,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assertExpressionType(sum(Divide(1.0, 2.0)), DoubleType)
     assertExpressionType(sum(Divide(1, 2.0f)), DoubleType)
     assertExpressionType(sum(Divide(1.0f, 2)), DoubleType)
-    assertExpressionType(sum(Divide(1, Decimal(2))), DecimalType(31, 11))
-    assertExpressionType(sum(Divide(Decimal(1), 2)), DecimalType(31, 11))
+    assertExpressionType(sum(Divide(1, Decimal(2))), DecimalType(22, 11))
+    assertExpressionType(sum(Divide(Decimal(1), 2)), DecimalType(26, 6))
     assertExpressionType(sum(Divide(Decimal(1), 2.0)), DoubleType)
     assertExpressionType(sum(Divide(1.0, Decimal(2.0))), DoubleType)
   }

diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimals.sql b/sql/core/src/test/resources/sql-tests/inputs/decimals.sql
@@ -0,0 +1,39 @@
+--
+--   Licensed to the Apache Software Foundation (ASF) under one or more
+--   contributor license agreements.  See the NOTICE file distributed with
+--   this work for additional information regarding copyright ownership.
+--   The ASF licenses this file to You under the Apache License, Version 2.0
+--   (the "License"); you may not use this file except in compliance with
+--   the License.  You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+--   Unless required by applicable law or agreed to in writing, software
+--   distributed under the License is distributed on an "AS IS" BASIS,
+--   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+--   See the License for the specific language governing permissions and
+--   limitations under the License.
+--
+
+-- tests for decimals handling in operations
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;
+
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789);
+
+-- test decimal operations
+select id, a+b, a-b, a*b, a/b from decimals_test order by id;
+
+-- test operations between decimals and constants
+select id, a*10, b/10 from decimals_test order by id;
+
+-- use rounding instead of returning NULL, according to new Hive's behavior and SQL standard
+set spark.sql.decimalOperations.allowTruncat=true;
+
+-- test decimal operations
+select id, a+b, a-b, a*b, a/b from decimals_test order by id;
+
+-- test operations between decimals and constants
+select id, a*10, b/10 from decimals_test order by id;
+
+drop table decimals_test;