Skip to content

Commit

Permalink
Make DecimalType minimum adjusted scale configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
rednaxelafx committed Jan 18, 2019
1 parent 8503aa3 commit 7da0de4
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ object DecimalPrecision extends TypeCoercionRule {
// Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
// Scale: max(6, s1 + p2 + 1)
val intDig = p1 - s1 + s2
val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1)
val scale = max(DecimalType.minimumAdjustedScale, s1 + p2 + 1)
val prec = intDig + scale
DecimalType.adjustPrecisionScale(prec, scale)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,15 @@ object SQLConf {
.booleanConf
.createWithDefault(true)

val DECIMAL_OPERATIONS_MINIMUM_ADJUSTED_SCALE =
buildConf("spark.sql.decimalOperations.minimumAdjustedScale")
.internal()
.doc("Decimal operations' minimum adjusted scale when " +
"spark.sql.decimalOperations.allowPrecisionLoss is true")
.intConf
.checkValue(scale => scale >= 0 && scale < 38, "valid scale should be in [0, 38)")
.createWithDefault(org.apache.spark.sql.types.DecimalType.DEFAULT_MINIMUM_ADJUSTED_SCALE)

val LITERAL_PICK_MINIMUM_PRECISION =
buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
.internal()
Expand Down Expand Up @@ -2002,6 +2011,9 @@ class SQLConf extends Serializable with Logging {

def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)

def decimalOperationsMinimumAdjustedScale: Int =
getConf(DECIMAL_OPERATIONS_MINIMUM_ADJUSTED_SCALE)

def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)

def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import scala.reflect.runtime.universe.typeTag
import org.apache.spark.annotation.Stable
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.internal.SQLConf

/**
* The data type representing `java.math.BigDecimal` values.
Expand Down Expand Up @@ -117,7 +118,7 @@ object DecimalType extends AbstractDataType {
val MAX_SCALE = 38
val SYSTEM_DEFAULT: DecimalType = DecimalType(MAX_PRECISION, 18)
val USER_DEFAULT: DecimalType = DecimalType(10, 0)
val MINIMUM_ADJUSTED_SCALE = 6
val DEFAULT_MINIMUM_ADJUSTED_SCALE = 6

// The decimal types compatible with other numeric types
private[sql] val BooleanDecimal = DecimalType(1, 0)
Expand Down Expand Up @@ -153,6 +154,10 @@ object DecimalType extends AbstractDataType {
DecimalType(min(precision, MAX_PRECISION), min(scale, MAX_SCALE))
}

def minimumAdjustedScale: Int = {
SQLConf.get.decimalOperationsMinimumAdjustedScale
}

/**
* Scale adjustment implementation is based on Hive's one, which is itself inspired to
* SQLServer's one. In particular, when a result precision is greater than
Expand All @@ -176,9 +181,9 @@ object DecimalType extends AbstractDataType {
} else {
// Precision/scale exceed maximum precision. Result must be adjusted to MAX_PRECISION.
val intDigits = precision - scale
// If original scale is less than MINIMUM_ADJUSTED_SCALE, use original scale value; otherwise
// preserve at least MINIMUM_ADJUSTED_SCALE fractional digits
val minScaleValue = Math.min(scale, MINIMUM_ADJUSTED_SCALE)
// If original scale is less than minimumAdjustedScale, use original scale value; otherwise
// preserve at least minimumAdjustedScale fractional digits
val minScaleValue = Math.min(scale, minimumAdjustedScale)
// The resulting scale is the maximum between what is available without causing a loss of
// digits for the integer part of the decimal and the minimum guaranteed scale, which is
// computed above
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,37 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;

-- use a higher minimum adjusted scale and repeat the above
set spark.sql.decimalOperations.minimumAdjustedScale=12;

-- test decimal operations
select id, a+b, a-b, a*b, a/b from decimals_test order by id;

-- test operations between decimals and constants
select id, a*10, b/10 from decimals_test order by id;

-- test operations on constants
select 10.3 * 3.0;
select 10.3000 * 3.0;
select 10.30000 * 30.0;
select 10.300000000000000000 * 3.000000000000000000;
select 10.300000000000000000 * 3.0000000000000000000;
select 2.35E10 * 1.0;

-- arithmetic operations causing an overflow return NULL
select (5e36 + 0.1) + 5e36;
select (-4e36 - 0.1) - 7e36;
select 12345678901234567890.0 * 12345678901234567890.0;
select 1e35 / 0.1;
select 1.2345678901234567890E30 * 1.2345678901234567890E25;

-- arithmetic operations causing an overflow at adjusted scale 7, return NULL
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;

-- arithmetic operations causing a precision loss are truncated
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;

-- return NULL instead of rounding, according to old Spark versions' behavior
set spark.sql.decimalOperations.allowPrecisionLoss=false;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 40
-- Number of queries: 57


-- !query 0
Expand Down Expand Up @@ -186,33 +186,33 @@ struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8


-- !query 22
set spark.sql.decimalOperations.allowPrecisionLoss=false
set spark.sql.decimalOperations.minimumAdjustedScale=12
-- !query 22 schema
struct<key:string,value:string>
-- !query 22 output
spark.sql.decimalOperations.allowPrecisionLoss false
spark.sql.decimalOperations.minimumAdjustedScale 12


-- !query 23
select id, a+b, a-b, a*b, a/b from decimals_test order by id
-- !query 23 schema
struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
struct<id:int,(a + b):decimal(38,17),(a - b):decimal(38,17),(a * b):decimal(38,12),(a / b):decimal(38,12)>
-- !query 23 output
1 1099 -899 NULL 0.1001001001001001
2 24690.246 0 NULL 1
3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123
4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436
1 1099 -899 99900 0.1001001001
2 24690.246 0 152402061.885129 1
3 1234.2234567891011 -1233.9765432108989 152.358023429668 0.000100037914
4 123456789123456790.12345678912345679 123456789123456787.87654321087654321 138698367904130467.51562262075 109890109097814272.043109406191


-- !query 24
select id, a*10, b/10 from decimals_test order by id
-- !query 24 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
-- !query 24 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.1123456789123456789
4 1234567891234567890 0.112345678912345679


-- !query 25
Expand Down Expand Up @@ -242,17 +242,17 @@ struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,
-- !query 28
select 10.300000000000000000 * 3.000000000000000000
-- !query 28 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
-- !query 28 output
30.9


-- !query 29
select 10.300000000000000000 * 3.0000000000000000000
-- !query 29 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)>
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
-- !query 29 output
NULL
30.9


-- !query 30
Expand Down Expand Up @@ -290,7 +290,7 @@ NULL
-- !query 34
select 1e35 / 0.1
-- !query 34 schema
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,12)>
-- !query 34 output
NULL

Expand All @@ -314,22 +314,164 @@ NULL
-- !query 37
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 37 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
-- !query 37 output
NULL
138698367904130467.654320988515622621


-- !query 38
select 12345678912345.123456789123 / 0.000000012345678
-- !query 38 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)>
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,12)>
-- !query 38 output
NULL
1000000073899961059796.725866331521


-- !query 39
drop table decimals_test
set spark.sql.decimalOperations.allowPrecisionLoss=false
-- !query 39 schema
struct<>
struct<key:string,value:string>
-- !query 39 output
spark.sql.decimalOperations.allowPrecisionLoss false


-- !query 40
select id, a+b, a-b, a*b, a/b from decimals_test order by id
-- !query 40 schema
struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
-- !query 40 output
1 1099 -899 NULL 0.1001001001001001
2 24690.246 0 NULL 1
3 1234.2234567891011 -1233.9765432108989 NULL 0.000100037913541123
4 123456789123456790.123456789123456789 123456789123456787.876543210876543211 NULL 109890109097814272.043109406191131436


-- !query 41
select id, a*10, b/10 from decimals_test order by id
-- !query 41 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
-- !query 41 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.1123456789123456789


-- !query 42
select 10.3 * 3.0
-- !query 42 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 42 output
30.9


-- !query 43
select 10.3000 * 3.0
-- !query 43 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 43 output
30.9


-- !query 44
select 10.30000 * 30.0
-- !query 44 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 44 output
309


-- !query 45
select 10.300000000000000000 * 3.000000000000000000
-- !query 45 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
-- !query 45 output
30.9


-- !query 46
select 10.300000000000000000 * 3.0000000000000000000
-- !query 46 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)>
-- !query 46 output
NULL


-- !query 47
select 2.35E10 * 1.0
-- !query 47 schema
struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)>
-- !query 47 output
23500000000


-- !query 48
select (5e36 + 0.1) + 5e36
-- !query 48 schema
struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 48 output
NULL


-- !query 49
select (-4e36 - 0.1) - 7e36
-- !query 49 schema
struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
-- !query 49 output
NULL


-- !query 50
select 12345678901234567890.0 * 12345678901234567890.0
-- !query 50 schema
struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
-- !query 50 output
NULL


-- !query 51
select 1e35 / 0.1
-- !query 51 schema
struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
-- !query 51 output
NULL


-- !query 52
select 1.2345678901234567890E30 * 1.2345678901234567890E25
-- !query 52 schema
struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)>
-- !query 52 output
NULL


-- !query 53
select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
-- !query 53 schema
struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,7)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,7))):decimal(38,7)>
-- !query 53 output
NULL


-- !query 54
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 54 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
-- !query 54 output
NULL


-- !query 55
select 12345678912345.123456789123 / 0.000000012345678
-- !query 55 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)>
-- !query 55 output
NULL


-- !query 56
drop table decimals_test
-- !query 56 schema
struct<>
-- !query 56 output

0 comments on commit 7da0de4

Please sign in to comment.