From 6ac786b2633fcb14a93b286f94ffcca04445cff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Till=20D=C3=B6hmen?= Date: Mon, 11 Jul 2022 11:39:37 +0200 Subject: [PATCH] [HOPSWORKS-3233] Timestamp incompatibility Spark/Hive/Hudi - Hive fix - release 3.1.1.3 (#31) --- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- .../kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 10 ++- .../datasources/DaysWritableV2.scala | 76 +++++++++++++++++++ sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- .../spark/sql/hive/HiveInspectors.scala | 23 +++--- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 131 insertions(+), 48 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritableV2.scala diff --git a/assembly/pom.xml b/assembly/pom.xml index eff71baf2bf71..216e29b770fca 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index f69d9d2542119..a883136c978da 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 60cd65b5ce626..3da0aef959878 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 79c71c54758bb..f34b51545d113 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 8f24f0fef1933..8009590119d9f 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index df62103c5c620..da0ca3f5361f2 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 864de33d85924..9e11b45053c87 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index df988839878df..665f0eee411f3 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 777a6029f6f91..1482b5ba73037 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index aa895704f57c9..6912a24f42b23 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 24b29af105729..81b5c3d688ffa 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 62b35f59e5216..fb1658045c4df 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index bbe97fd0fd535..59828b5cc3478 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b4223b97d5273..23310e2a5eaf9 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 65259af9771ff..6ece409cb6905 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 067eed45c7d35..cd03ade0e34a9 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index cbb66367b0712..1d88784536c7f 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 070fc509ad16c..82afbe4b75baa 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index e23bc8f71c203..28d1ebebe3418 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 8d6f86a0ddf41..edfcd4abbdd5f 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 569965b753d90..3d74a40d20ab7 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 978ea94c0323f..5b792daaf6143 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 0982319e9a887..d0c997c6ef8f1 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 7da19d7e7d00b..ec24f8d13028e 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/pom.xml b/pom.xml index 7d2e34790ddde..4b78a2a98d20b 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 03781c87894ca..bf85654fd3df2 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 6aefd861b251f..41e0d66f7997b 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 2f194fcb2db85..0ba85029a012d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index b4f46d044a229..7753147f42e38 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 80a6a4e12279b..bc515a840ba0c 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 262700e8e0ed1..18cbdeb4f85c6 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9623d8de52e06..aca8e472c71f1 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml @@ -97,6 +97,14 @@ ${hive.group} hive-storage-api + + ${hive.group} + hive-common + + + ${hive.group} + hive-serde + org.apache.parquet parquet-column diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritableV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritableV2.scala new file mode 100644 index 0000000000000..21cba287aba50 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DaysWritableV2.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import java.io.{DataInput, DataOutput, IOException} + +import org.apache.hadoop.hive.common.`type`.Date +import org.apache.hadoop.hive.serde2.io.DateWritableV2 +import org.apache.hadoop.io.WritableUtils +import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseJulianToGregorianDays} + + +/** + * The class accepts/returns days in Gregorian calendar and rebase them + * via conversion to local date in Julian calendar for dates before 1582-10-15 + * in read/write for backward compatibility with Spark 2.4 and earlier versions. + * + * @param gregorianDays The number of days since the epoch 1970-01-01 in + * Gregorian calendar. + * @param julianDays The number of days since the epoch 1970-01-01 in + * Julian calendar. + */ +class DaysWritableV2( + var gregorianDays: Int, + var julianDays: Int) + extends DateWritableV2 { + + def this() = this(0, 0) + def this(gregorianDays: Int) = + this(gregorianDays, rebaseGregorianToJulianDays(gregorianDays)) + def this(dateWritable: DateWritableV2) = { + this( + gregorianDays = dateWritable match { + case daysWritable: DaysWritableV2 => daysWritable.gregorianDays + case dateWritable: DateWritableV2 => + rebaseJulianToGregorianDays(dateWritable.getDays) + }, + julianDays = dateWritable.getDays) + } + + override def getDays: Int = julianDays + override def get: Date = { + Date.ofEpochMilli(DateWritableV2.daysToMillis(julianDays)) + } + + override def set(d: Int): Unit = { + gregorianDays = d + julianDays = rebaseGregorianToJulianDays(d) + } + + @throws[IOException] + override def write(out: DataOutput): Unit = { + WritableUtils.writeVInt(out, julianDays) + } + + @throws[IOException] + override def readFields(in: DataInput): Unit = { + julianDays = WritableUtils.readVInt(in) + gregorianDays = rebaseJulianToGregorianDays(julianDays) + } +} diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 1e470c08aa65c..2887c4607acaa 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 896a727bba5af..3dcc8c6f1f5fb 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../../pom.xml diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 8f4e3eab96d22..a8e3cbbad98ff 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -20,19 +20,17 @@ package org.apache.spark.sql.hive import java.lang.reflect.{ParameterizedType, Type, WildcardType} import scala.collection.JavaConverters._ - import org.apache.hadoop.{io => hadoopIo} -import org.apache.hadoop.hive.common.`type`.{HiveChar, HiveDecimal, HiveVarchar} +import org.apache.hadoop.hive.common.`type`.{HiveChar, HiveDecimal, HiveVarchar, Timestamp} import org.apache.hadoop.hive.serde2.{io => hiveIo} import org.apache.hadoop.hive.serde2.objectinspector.{StructField => HiveStructField, _} import org.apache.hadoop.hive.serde2.objectinspector.primitive._ import org.apache.hadoop.hive.serde2.typeinfo.{DecimalTypeInfo, TypeInfoFactory} - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.execution.datasources.DaysWritable +import org.apache.spark.sql.execution.datasources.DaysWritableV2 import org.apache.spark.sql.types import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -91,7 +89,7 @@ import org.apache.spark.unsafe.types.UTF8String * org.apache.hadoop.hive.serde2.io.ByteWritable * org.apache.hadoop.io.BytesWritable * org.apache.hadoop.hive.serde2.io.DateWritable - * org.apache.hadoop.hive.serde2.io.TimestampWritable + * org.apache.hadoop.hive.serde2.io.TimestampWritableV2 * org.apache.hadoop.hive.serde2.io.HiveDecimalWritable * Complex Type * List: Object[] / java.util.List @@ -188,8 +186,8 @@ private[hive] trait HiveInspectors { case c: Class[_] if c == classOf[hiveIo.HiveDecimalWritable] => DecimalType.SYSTEM_DEFAULT case c: Class[_] if c == classOf[hiveIo.ByteWritable] => ByteType case c: Class[_] if c == classOf[hiveIo.ShortWritable] => ShortType - case c: Class[_] if c == classOf[hiveIo.DateWritable] => DateType - case c: Class[_] if c == classOf[hiveIo.TimestampWritable] => TimestampType + case c: Class[_] if c == classOf[hiveIo.DateWritableV2] => DateType + case c: Class[_] if c == classOf[hiveIo.TimestampWritableV2] => TimestampType case c: Class[_] if c == classOf[hadoopIo.Text] => StringType case c: Class[_] if c == classOf[hadoopIo.IntWritable] => IntegerType case c: Class[_] if c == classOf[hadoopIo.LongWritable] => LongType @@ -619,7 +617,7 @@ private[hive] trait HiveInspectors { case x: DateObjectInspector if x.preferWritable() => data: Any => { if (data != null) { - new DaysWritable(x.getPrimitiveWritableObject(data).getDays).gregorianDays + new DaysWritableV2(x.getPrimitiveWritableObject(data).getDays).gregorianDays } else { null } @@ -1013,18 +1011,19 @@ private[hive] trait HiveInspectors { new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]]) } - private def getDateWritable(value: Any): DaysWritable = + private def getDateWritable(value: Any): DaysWritableV2 = if (value == null) { null } else { - new DaysWritable(value.asInstanceOf[Int]) + new DaysWritableV2(value.asInstanceOf[Int]) } - private def getTimestampWritable(value: Any): hiveIo.TimestampWritable = + private def getTimestampWritable(value: Any): hiveIo.TimestampWritableV2 = if (value == null) { null } else { - new hiveIo.TimestampWritable(DateTimeUtils.toJavaTimestamp(value.asInstanceOf[Long])) + val ts = DateTimeUtils.toJavaTimestamp(value.asInstanceOf[Long]); + new hiveIo.TimestampWritableV2(Timestamp.ofEpochMilli(ts.getTime)); } private def getDecimalWritable(value: Any): hiveIo.HiveDecimalWritable = diff --git a/streaming/pom.xml b/streaming/pom.xml index 224d8c1f6c384..27887ecf8e4e6 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index a0aa9ebe5c8a9..ee243e62bca1d 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.1.1.2 + 3.1.1.3 ../pom.xml