Skip to content

Commit

Permalink
[SPARK-25258][SPARK-23131][SPARK-25176][BUILD] Upgrade Kryo to 4.0.2
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

Upgrade chill to 0.9.3, Kryo to 4.0.2, to get bug fixes and improvements.

The resolved tickets includes:
- SPARK-25258 Upgrade kryo package to version 4.0.2
- SPARK-23131 Kryo raises StackOverflow during serializing GLR model
- SPARK-25176 Kryo fails to serialize a parametrised type hierarchy

More details:
https://github.com/twitter/chill/releases/tag/v0.9.3
twitter/chill@cc3910d

## How was this patch tested?

Existing tests.

Closes #22179 from wangyum/SPARK-23131.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Sean Owen <sean.owen@databricks.com>
  • Loading branch information
2 people authored and srowen committed Sep 5, 2018
1 parent 458468a commit 3e03303
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,26 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
assert(!ser2.getAutoReset)
}

test("SPARK-25176 ClassCastException when writing a Map after previously " +
"reading a Map with different generic type") {
// This test uses the example in https://github.com/EsotericSoftware/kryo/issues/384
import java.util._
val ser = new KryoSerializer(new SparkConf).newInstance().asInstanceOf[KryoSerializerInstance]

class MapHolder {
private val mapOne = new HashMap[Int, String]
private val mapTwo = this.mapOne
}

val serializedMapHolder = ser.serialize(new MapHolder)
ser.deserialize[MapHolder](serializedMapHolder)

val stringMap = new HashMap[Int, List[String]]
stringMap.put(1, new ArrayList[String])
val serializedMap = ser.serialize[Map[Int, List[String]]](stringMap)
ser.deserialize[HashMap[Int, List[String]]](serializedMap)
}

private def testSerializerInstanceReuse(autoReset: Boolean, referenceTracking: Boolean): Unit = {
val conf = new SparkConf(loadDefaults = false)
.set("spark.kryo.referenceTracking", referenceTracking.toString)
Expand Down
8 changes: 4 additions & 4 deletions dev/deps/spark-deps-hadoop-2.6
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.8.4.jar
chill_2.11-0.8.4.jar
chill-java-0.9.3.jar
chill_2.11-0.9.3.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
Expand Down Expand Up @@ -130,7 +130,7 @@ jsr305-1.3.9.jar
jta-1.1.jar
jtransforms-2.4.0.jar
jul-to-slf4j-1.7.16.jar
kryo-shaded-3.0.3.jar
kryo-shaded-4.0.2.jar
kubernetes-client-3.0.0.jar
kubernetes-model-2.0.0.jar
leveldbjni-all-1.8.jar
Expand All @@ -149,7 +149,7 @@ metrics-jvm-3.1.5.jar
minlog-1.3.0.jar
netty-3.9.9.Final.jar
netty-all-4.1.17.Final.jar
objenesis-2.1.jar
objenesis-2.5.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
Expand Down
8 changes: 4 additions & 4 deletions dev/deps/spark-deps-hadoop-2.7
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.8.4.jar
chill_2.11-0.8.4.jar
chill-java-0.9.3.jar
chill_2.11-0.9.3.jar
commons-beanutils-1.7.0.jar
commons-beanutils-core-1.8.0.jar
commons-cli-1.2.jar
Expand Down Expand Up @@ -132,7 +132,7 @@ jsr305-1.3.9.jar
jta-1.1.jar
jtransforms-2.4.0.jar
jul-to-slf4j-1.7.16.jar
kryo-shaded-3.0.3.jar
kryo-shaded-4.0.2.jar
kubernetes-client-3.0.0.jar
kubernetes-model-2.0.0.jar
leveldbjni-all-1.8.jar
Expand All @@ -151,7 +151,7 @@ metrics-jvm-3.1.5.jar
minlog-1.3.0.jar
netty-3.9.9.Final.jar
netty-all-4.1.17.Final.jar
objenesis-2.1.jar
objenesis-2.5.1.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
opencsv-2.3.jar
Expand Down
8 changes: 4 additions & 4 deletions dev/deps/spark-deps-hadoop-3.1
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ breeze_2.11-0.13.2.jar
calcite-avatica-1.2.0-incubating.jar
calcite-core-1.2.0-incubating.jar
calcite-linq4j-1.2.0-incubating.jar
chill-java-0.8.4.jar
chill_2.11-0.8.4.jar
chill-java-0.9.3.jar
chill_2.11-0.9.3.jar
commons-beanutils-1.9.3.jar
commons-cli-1.2.jar
commons-codec-1.10.jar
Expand Down Expand Up @@ -146,7 +146,7 @@ kerby-config-1.0.1.jar
kerby-pkix-1.0.1.jar
kerby-util-1.0.1.jar
kerby-xdr-1.0.1.jar
kryo-shaded-3.0.3.jar
kryo-shaded-4.0.2.jar
kubernetes-client-3.0.0.jar
kubernetes-model-2.0.0.jar
leveldbjni-all-1.8.jar
Expand All @@ -167,7 +167,7 @@ mssql-jdbc-6.2.1.jre7.jar
netty-3.9.9.Final.jar
netty-all-4.1.17.Final.jar
nimbus-jose-jwt-4.41.1.jar
objenesis-2.1.jar
objenesis-2.5.1.jar
okhttp-2.7.5.jar
okhttp-3.8.1.jar
okio-1.13.0.jar
Expand Down
2 changes: 1 addition & 1 deletion docs/tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ in your operations) and performance. It provides two serialization libraries:
Java serialization is flexible but often quite slow, and leads to large
serialized formats for many classes.
* [Kryo serialization](https://github.com/EsotericSoftware/kryo): Spark can also use
the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly
the Kryo library (version 4) to serialize objects more quickly. Kryo is significantly
faster and more compact than Java serialization (often as much as 10x), but does not support all
`Serializable` types and requires you to *register* the classes you'll use in the program in advance
for best performance.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.ml.regression

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.ml.classification.LogisticRegressionSuite._
import org.apache.spark.ml.feature.{Instance, OffsetInstance}
import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
Expand All @@ -29,6 +29,7 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.random._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.FloatType
Expand Down Expand Up @@ -1687,6 +1688,14 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
assert(evalSummary.deviance === summary.deviance)
assert(evalSummary.aic === summary.aic)
}

test("SPARK-23131 Kryo raises StackOverflow during serializing GLR model") {
val conf = new SparkConf(false)
val ser = new KryoSerializer(conf).newInstance()
val trainer = new GeneralizedLinearRegression()
val model = trainer.fit(Seq(Instance(1.0, 1.0, Vectors.dense(1.0, 7.0))).toDF)
ser.serialize[GeneralizedLinearRegressionModel](model)
}
}

object GeneralizedLinearRegressionSuite {
Expand Down
6 changes: 5 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
<hive.parquet.version>1.6.0</hive.parquet.version>
<jetty.version>9.3.24.v20180605</jetty.version>
<javaxservlet.version>3.1.0</javaxservlet.version>
<chill.version>0.8.4</chill.version>
<chill.version>0.9.3</chill.version>
<ivy.version>2.4.0</ivy.version>
<oro.version>2.0.8</oro.version>
<codahale.metrics.version>3.1.5</codahale.metrics.version>
Expand Down Expand Up @@ -1770,6 +1770,10 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
</exclusion>
<exclusion>
<groupId> com.esotericsoftware</groupId>
<artifactId>kryo-shaded</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
Expand Down

0 comments on commit 3e03303

Please sign in to comment.