Skip to content

Commit

Permalink
[SPARK-35561][SQL] Remove leading zeros from empty static number type…
Browse files Browse the repository at this point in the history
… partition

### What changes were proposed in this pull request?

This PR removes leading zeros from static number type partition when we insert into a partition table with empty partitions.

create table

    CREATE TABLE `table_int` ( `id` INT, `c_string` STRING, `p_int` int)
    USING parquet PARTITIONED BY (p_int);

insert

    insert overwrite table table_int partition (p_int='00011')
    select 1, 'c string'
    where true ;

|partition|
|---------|
|p_int=11|

    insert overwrite table table_int partition (p_int='00012')
    select 1, 'c string'
    where false ;

|partition|
|---------|
|p_int=00012|

### Why are the changes needed?

This PR creates consistent result when insert empty or non-empty partition

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Add Unit test

Closes #33291 from dgd-contributor/35561_insert_integer_partition_fail_when_empty.

Authored-by: dgd-contributor <dgd_contributor@viettel.com.vn>
Signed-off-by: Sean Owen <srowen@gmail.com>
  • Loading branch information
dchvn authored and srowen committed Jul 23, 2021
1 parent f61d599 commit fc29c91
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,19 @@ object PartitioningUtils {
*/
def getPathFragment(spec: TablePartitionSpec, partitionSchema: StructType): String = {
partitionSchema.map { field =>
escapePathName(field.name) + "=" + getPartitionValueString(spec(field.name))
escapePathName(field.name) + "=" +
getPartitionValueString(
removeLeadingZerosFromNumberTypePartition(spec(field.name), field.dataType))
}.mkString("/")
}

def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String =
dataType match {
case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
castPartValueToDesiredType(dataType, value, null).toString
case _ => value
}

def getPathFragment(spec: TablePartitionSpec, partitionColumns: Seq[Attribute]): String = {
getPathFragment(spec, StructType.fromAttributes(partitionColumns))
}
Expand Down Expand Up @@ -523,9 +532,9 @@ object PartitioningUtils {
case _ if value == DEFAULT_PARTITION_NAME => null
case NullType => null
case StringType => UTF8String.fromString(unescapePathName(value))
case IntegerType => Integer.parseInt(value)
case ByteType | ShortType | IntegerType => Integer.parseInt(value)
case LongType => JLong.parseLong(value)
case DoubleType => JDouble.parseDouble(value)
case FloatType | DoubleType => JDouble.parseDouble(value)
case _: DecimalType => Literal(new JBigDecimal(value)).value
case DateType =>
Cast(Literal(value), DateType, Some(zoneId.getId)).eval()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,13 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
.sparkConf
.set(SQLConf.USE_V1_SOURCE_LIST, "")

test("SPARK-35561: remove leading zeros from empty static number type partition") {
val spec = Map("p_int"-> "010", "p_float"-> "01.00")
val schema = new StructType().add("p_int", "int").add("p_float", "float")
val path = PartitioningUtils.getPathFragment(spec, schema)
assert("p_int=10/p_float=1.0" === path)
}

test("read partitioned table - partition key included in Parquet file") {
withTempDir { base =>
for {
Expand Down

0 comments on commit fc29c91

Please sign in to comment.